1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
72 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
75 struct processor_costs size_cost = { /* costs for tuning for size */
76 COSTS_N_BYTES (2), /* cost of an add instruction */
77 COSTS_N_BYTES (3), /* cost of a lea instruction */
78 COSTS_N_BYTES (2), /* variable shift costs */
79 COSTS_N_BYTES (3), /* constant shift costs */
80 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
81 COSTS_N_BYTES (3), /* HI */
82 COSTS_N_BYTES (3), /* SI */
83 COSTS_N_BYTES (3), /* DI */
84 COSTS_N_BYTES (5)}, /* other */
85 0, /* cost of multiply per each bit set */
86 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
87 COSTS_N_BYTES (3), /* HI */
88 COSTS_N_BYTES (3), /* SI */
89 COSTS_N_BYTES (3), /* DI */
90 COSTS_N_BYTES (5)}, /* other */
91 COSTS_N_BYTES (3), /* cost of movsx */
92 COSTS_N_BYTES (3), /* cost of movzx */
95 2, /* cost for loading QImode using movzbl */
96 {2, 2, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 2, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {2, 2, 2}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {2, 2, 2}, /* cost of storing fp registers
104 in SFmode, DFmode and XFmode */
105 3, /* cost of moving MMX register */
106 {3, 3}, /* cost of loading MMX registers
107 in SImode and DImode */
108 {3, 3}, /* cost of storing MMX registers
109 in SImode and DImode */
110 3, /* cost of moving SSE register */
111 {3, 3, 3}, /* cost of loading SSE registers
112 in SImode, DImode and TImode */
113 {3, 3, 3}, /* cost of storing SSE registers
114 in SImode, DImode and TImode */
115 3, /* MMX or SSE register to integer */
116 0, /* size of prefetch block */
117 0, /* number of parallel prefetches */
119 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
120 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
121 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
122 COSTS_N_BYTES (2), /* cost of FABS instruction. */
123 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
124 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
127 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
131 /* Processor costs (relative to an add) */
133 struct processor_costs i386_cost = { /* 386 specific costs */
134 COSTS_N_INSNS (1), /* cost of an add instruction */
135 COSTS_N_INSNS (1), /* cost of a lea instruction */
136 COSTS_N_INSNS (3), /* variable shift costs */
137 COSTS_N_INSNS (2), /* constant shift costs */
138 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
139 COSTS_N_INSNS (6), /* HI */
140 COSTS_N_INSNS (6), /* SI */
141 COSTS_N_INSNS (6), /* DI */
142 COSTS_N_INSNS (6)}, /* other */
143 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
144 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
145 COSTS_N_INSNS (23), /* HI */
146 COSTS_N_INSNS (23), /* SI */
147 COSTS_N_INSNS (23), /* DI */
148 COSTS_N_INSNS (23)}, /* other */
149 COSTS_N_INSNS (3), /* cost of movsx */
150 COSTS_N_INSNS (2), /* cost of movzx */
151 15, /* "large" insn */
153 4, /* cost for loading QImode using movzbl */
154 {2, 4, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 4, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {8, 8, 8}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {8, 8, 8}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 2, /* cost of moving MMX register */
164 {4, 8}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {4, 8}, /* cost of storing MMX registers
167 in SImode and DImode */
168 2, /* cost of moving SSE register */
169 {4, 8, 16}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {4, 8, 16}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of prefetch block */
175 0, /* number of parallel prefetches */
177 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
178 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
179 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
180 COSTS_N_INSNS (22), /* cost of FABS instruction. */
181 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
182 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
183 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
184 DUMMY_STRINGOP_ALGS},
185 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
186 DUMMY_STRINGOP_ALGS},
190 struct processor_costs i486_cost = { /* 486 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (12), /* HI */
197 COSTS_N_INSNS (12), /* SI */
198 COSTS_N_INSNS (12), /* DI */
199 COSTS_N_INSNS (12)}, /* other */
200 1, /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (40), /* HI */
203 COSTS_N_INSNS (40), /* SI */
204 COSTS_N_INSNS (40), /* DI */
205 COSTS_N_INSNS (40)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
235 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
236 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
237 COSTS_N_INSNS (3), /* cost of FABS instruction. */
238 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
239 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
240 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
241 DUMMY_STRINGOP_ALGS},
242 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
247 struct processor_costs pentium_cost = {
248 COSTS_N_INSNS (1), /* cost of an add instruction */
249 COSTS_N_INSNS (1), /* cost of a lea instruction */
250 COSTS_N_INSNS (4), /* variable shift costs */
251 COSTS_N_INSNS (1), /* constant shift costs */
252 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
253 COSTS_N_INSNS (11), /* HI */
254 COSTS_N_INSNS (11), /* SI */
255 COSTS_N_INSNS (11), /* DI */
256 COSTS_N_INSNS (11)}, /* other */
257 0, /* cost of multiply per each bit set */
258 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
259 COSTS_N_INSNS (25), /* HI */
260 COSTS_N_INSNS (25), /* SI */
261 COSTS_N_INSNS (25), /* DI */
262 COSTS_N_INSNS (25)}, /* other */
263 COSTS_N_INSNS (3), /* cost of movsx */
264 COSTS_N_INSNS (2), /* cost of movzx */
265 8, /* "large" insn */
267 6, /* cost for loading QImode using movzbl */
268 {2, 4, 2}, /* cost of loading integer registers
269 in QImode, HImode and SImode.
270 Relative to reg-reg move (2). */
271 {2, 4, 2}, /* cost of storing integer registers */
272 2, /* cost of reg,reg fld/fst */
273 {2, 2, 6}, /* cost of loading fp registers
274 in SFmode, DFmode and XFmode */
275 {4, 4, 6}, /* cost of storing fp registers
276 in SFmode, DFmode and XFmode */
277 8, /* cost of moving MMX register */
278 {8, 8}, /* cost of loading MMX registers
279 in SImode and DImode */
280 {8, 8}, /* cost of storing MMX registers
281 in SImode and DImode */
282 2, /* cost of moving SSE register */
283 {4, 8, 16}, /* cost of loading SSE registers
284 in SImode, DImode and TImode */
285 {4, 8, 16}, /* cost of storing SSE registers
286 in SImode, DImode and TImode */
287 3, /* MMX or SSE register to integer */
288 0, /* size of prefetch block */
289 0, /* number of parallel prefetches */
291 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
292 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
293 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
294 COSTS_N_INSNS (1), /* cost of FABS instruction. */
295 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
296 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
297 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
298 DUMMY_STRINGOP_ALGS},
299 {{libcall, {{-1, rep_prefix_4_byte}}},
304 struct processor_costs pentiumpro_cost = {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (1), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (4), /* HI */
311 COSTS_N_INSNS (4), /* SI */
312 COSTS_N_INSNS (4), /* DI */
313 COSTS_N_INSNS (4)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (17), /* HI */
317 COSTS_N_INSNS (17), /* SI */
318 COSTS_N_INSNS (17), /* DI */
319 COSTS_N_INSNS (17)}, /* other */
320 COSTS_N_INSNS (1), /* cost of movsx */
321 COSTS_N_INSNS (1), /* cost of movzx */
322 8, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 4, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 2, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 2, /* cost of moving MMX register */
335 {2, 2}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {2, 2}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {2, 2, 8}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {2, 2, 8}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 32, /* size of prefetch block */
346 6, /* number of parallel prefetches */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
354 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
355 the alignment). For small blocks inline loop is still a noticeable win, for bigger
356 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
357 more expensive startup time in CPU, but after 4K the difference is down in the noise.
359 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
360 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
361 DUMMY_STRINGOP_ALGS},
362 {{rep_prefix_4_byte, {{1024, unrolled_loop},
363 {8192, rep_prefix_4_byte}, {-1, libcall}}},
368 struct processor_costs geode_cost = {
369 COSTS_N_INSNS (1), /* cost of an add instruction */
370 COSTS_N_INSNS (1), /* cost of a lea instruction */
371 COSTS_N_INSNS (2), /* variable shift costs */
372 COSTS_N_INSNS (1), /* constant shift costs */
373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
374 COSTS_N_INSNS (4), /* HI */
375 COSTS_N_INSNS (7), /* SI */
376 COSTS_N_INSNS (7), /* DI */
377 COSTS_N_INSNS (7)}, /* other */
378 0, /* cost of multiply per each bit set */
379 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
380 COSTS_N_INSNS (23), /* HI */
381 COSTS_N_INSNS (39), /* SI */
382 COSTS_N_INSNS (39), /* DI */
383 COSTS_N_INSNS (39)}, /* other */
384 COSTS_N_INSNS (1), /* cost of movsx */
385 COSTS_N_INSNS (1), /* cost of movzx */
386 8, /* "large" insn */
388 1, /* cost for loading QImode using movzbl */
389 {1, 1, 1}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {1, 1, 1}, /* cost of storing integer registers */
393 1, /* cost of reg,reg fld/fst */
394 {1, 1, 1}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {4, 6, 6}, /* cost of storing fp registers
397 in SFmode, DFmode and XFmode */
399 1, /* cost of moving MMX register */
400 {1, 1}, /* cost of loading MMX registers
401 in SImode and DImode */
402 {1, 1}, /* cost of storing MMX registers
403 in SImode and DImode */
404 1, /* cost of moving SSE register */
405 {1, 1, 1}, /* cost of loading SSE registers
406 in SImode, DImode and TImode */
407 {1, 1, 1}, /* cost of storing SSE registers
408 in SImode, DImode and TImode */
409 1, /* MMX or SSE register to integer */
410 32, /* size of prefetch block */
411 1, /* number of parallel prefetches */
413 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
414 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
415 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
416 COSTS_N_INSNS (1), /* cost of FABS instruction. */
417 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
418 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
419 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
420 DUMMY_STRINGOP_ALGS},
421 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
426 struct processor_costs k6_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (2), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (3), /* HI */
433 COSTS_N_INSNS (3), /* SI */
434 COSTS_N_INSNS (3), /* DI */
435 COSTS_N_INSNS (3)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (18), /* HI */
439 COSTS_N_INSNS (18), /* SI */
440 COSTS_N_INSNS (18), /* DI */
441 COSTS_N_INSNS (18)}, /* other */
442 COSTS_N_INSNS (2), /* cost of movsx */
443 COSTS_N_INSNS (2), /* cost of movzx */
444 8, /* "large" insn */
446 3, /* cost for loading QImode using movzbl */
447 {4, 5, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 3, 2}, /* cost of storing integer registers */
451 4, /* cost of reg,reg fld/fst */
452 {6, 6, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 4}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 6, /* MMX or SSE register to integer */
467 32, /* size of prefetch block */
468 1, /* number of parallel prefetches */
470 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
476 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
477 DUMMY_STRINGOP_ALGS},
478 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
483 struct processor_costs athlon_cost = {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (2), /* cost of a lea instruction */
486 COSTS_N_INSNS (1), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (5), /* HI */
490 COSTS_N_INSNS (5), /* SI */
491 COSTS_N_INSNS (5), /* DI */
492 COSTS_N_INSNS (5)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (26), /* HI */
496 COSTS_N_INSNS (42), /* SI */
497 COSTS_N_INSNS (74), /* DI */
498 COSTS_N_INSNS (74)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 4, /* cost for loading QImode using movzbl */
504 {3, 4, 3}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {3, 4, 3}, /* cost of storing integer registers */
508 4, /* cost of reg,reg fld/fst */
509 {4, 4, 12}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {6, 6, 8}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
513 2, /* cost of moving MMX register */
514 {4, 4}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {4, 4}, /* cost of storing MMX registers
517 in SImode and DImode */
518 2, /* cost of moving SSE register */
519 {4, 4, 6}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {4, 4, 5}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 5, /* MMX or SSE register to integer */
524 64, /* size of prefetch block */
525 6, /* number of parallel prefetches */
527 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
528 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
529 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
530 COSTS_N_INSNS (2), /* cost of FABS instruction. */
531 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
532 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
533 /* For some reason, Athlon deals better with REP prefix (relative to loops)
534 compared to K8. Alignment becomes important after 8 bytes for memcpy and
535 128 bytes for memset. */
536 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
537 DUMMY_STRINGOP_ALGS},
538 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
543 struct processor_costs k8_cost = {
544 COSTS_N_INSNS (1), /* cost of an add instruction */
545 COSTS_N_INSNS (2), /* cost of a lea instruction */
546 COSTS_N_INSNS (1), /* variable shift costs */
547 COSTS_N_INSNS (1), /* constant shift costs */
548 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
549 COSTS_N_INSNS (4), /* HI */
550 COSTS_N_INSNS (3), /* SI */
551 COSTS_N_INSNS (4), /* DI */
552 COSTS_N_INSNS (5)}, /* other */
553 0, /* cost of multiply per each bit set */
554 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
555 COSTS_N_INSNS (26), /* HI */
556 COSTS_N_INSNS (42), /* SI */
557 COSTS_N_INSNS (74), /* DI */
558 COSTS_N_INSNS (74)}, /* other */
559 COSTS_N_INSNS (1), /* cost of movsx */
560 COSTS_N_INSNS (1), /* cost of movzx */
561 8, /* "large" insn */
563 4, /* cost for loading QImode using movzbl */
564 {3, 4, 3}, /* cost of loading integer registers
565 in QImode, HImode and SImode.
566 Relative to reg-reg move (2). */
567 {3, 4, 3}, /* cost of storing integer registers */
568 4, /* cost of reg,reg fld/fst */
569 {4, 4, 12}, /* cost of loading fp registers
570 in SFmode, DFmode and XFmode */
571 {6, 6, 8}, /* cost of storing fp registers
572 in SFmode, DFmode and XFmode */
573 2, /* cost of moving MMX register */
574 {3, 3}, /* cost of loading MMX registers
575 in SImode and DImode */
576 {4, 4}, /* cost of storing MMX registers
577 in SImode and DImode */
578 2, /* cost of moving SSE register */
579 {4, 3, 6}, /* cost of loading SSE registers
580 in SImode, DImode and TImode */
581 {4, 4, 5}, /* cost of storing SSE registers
582 in SImode, DImode and TImode */
583 5, /* MMX or SSE register to integer */
584 64, /* size of prefetch block */
585 /* New AMD processors never drop prefetches; if they cannot be performed
586 immediately, they are queued. We set number of simultaneous prefetches
587 to a large constant to reflect this (it probably is not a good idea not
588 to limit number of prefetches at all, as their execution also takes some
590 100, /* number of parallel prefetches */
592 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
593 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
594 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
595 COSTS_N_INSNS (2), /* cost of FABS instruction. */
596 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
597 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
598 /* K8 has optimized REP instruction for medium sized blocks, but for very small
599 blocks it is better to use loop. For large blocks, libcall can do
600 nontemporary accesses and beat inline considerably. */
601 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
602 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
603 {{libcall, {{8, loop}, {24, unrolled_loop},
604 {2048, rep_prefix_4_byte}, {-1, libcall}}},
605 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
608 struct processor_costs amdfam10_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (2), /* cost of a lea instruction */
611 COSTS_N_INSNS (1), /* variable shift costs */
612 COSTS_N_INSNS (1), /* constant shift costs */
613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (4), /* HI */
615 COSTS_N_INSNS (3), /* SI */
616 COSTS_N_INSNS (4), /* DI */
617 COSTS_N_INSNS (5)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (35), /* HI */
621 COSTS_N_INSNS (51), /* SI */
622 COSTS_N_INSNS (83), /* DI */
623 COSTS_N_INSNS (83)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 8, /* "large" insn */
628 4, /* cost for loading QImode using movzbl */
629 {3, 4, 3}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {3, 4, 3}, /* cost of storing integer registers */
633 4, /* cost of reg,reg fld/fst */
634 {4, 4, 12}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {6, 6, 8}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {3, 3}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {4, 4}, /* cost of storing MMX registers
642 in SImode and DImode */
643 2, /* cost of moving SSE register */
644 {4, 4, 3}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {4, 4, 5}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 3, /* MMX or SSE register to integer */
650 MOVD reg64, xmmreg Double FSTORE 4
651 MOVD reg32, xmmreg Double FSTORE 4
653 MOVD reg64, xmmreg Double FADD 3
655 MOVD reg32, xmmreg Double FADD 3
657 64, /* size of prefetch block */
658 /* New AMD processors never drop prefetches; if they cannot be performed
659 immediately, they are queued. We set number of simultaneous prefetches
660 to a large constant to reflect this (it probably is not a good idea not
661 to limit number of prefetches at all, as their execution also takes some
663 100, /* number of parallel prefetches */
665 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
666 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
667 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
668 COSTS_N_INSNS (2), /* cost of FABS instruction. */
669 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
670 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
672 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
673 very small blocks it is better to use loop. For large blocks, libcall can
674 do nontemporary accesses and beat inline considerably. */
675 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
676 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
677 {{libcall, {{8, loop}, {24, unrolled_loop},
678 {2048, rep_prefix_4_byte}, {-1, libcall}}},
679 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
683 struct processor_costs pentium4_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (3), /* cost of a lea instruction */
686 COSTS_N_INSNS (4), /* variable shift costs */
687 COSTS_N_INSNS (4), /* constant shift costs */
688 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (15), /* HI */
690 COSTS_N_INSNS (15), /* SI */
691 COSTS_N_INSNS (15), /* DI */
692 COSTS_N_INSNS (15)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (56), /* HI */
696 COSTS_N_INSNS (56), /* SI */
697 COSTS_N_INSNS (56), /* DI */
698 COSTS_N_INSNS (56)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 16, /* "large" insn */
703 2, /* cost for loading QImode using movzbl */
704 {4, 5, 4}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {2, 3, 2}, /* cost of storing integer registers */
708 2, /* cost of reg,reg fld/fst */
709 {2, 2, 6}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 4, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
713 2, /* cost of moving MMX register */
714 {2, 2}, /* cost of loading MMX registers
715 in SImode and DImode */
716 {2, 2}, /* cost of storing MMX registers
717 in SImode and DImode */
718 12, /* cost of moving SSE register */
719 {12, 12, 12}, /* cost of loading SSE registers
720 in SImode, DImode and TImode */
721 {2, 2, 8}, /* cost of storing SSE registers
722 in SImode, DImode and TImode */
723 10, /* MMX or SSE register to integer */
724 64, /* size of prefetch block */
725 6, /* number of parallel prefetches */
727 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
728 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
729 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
730 COSTS_N_INSNS (2), /* cost of FABS instruction. */
731 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
732 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
733 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
734 DUMMY_STRINGOP_ALGS},
735 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
737 DUMMY_STRINGOP_ALGS},
741 struct processor_costs nocona_cost = {
742 COSTS_N_INSNS (1), /* cost of an add instruction */
743 COSTS_N_INSNS (1), /* cost of a lea instruction */
744 COSTS_N_INSNS (1), /* variable shift costs */
745 COSTS_N_INSNS (1), /* constant shift costs */
746 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
747 COSTS_N_INSNS (10), /* HI */
748 COSTS_N_INSNS (10), /* SI */
749 COSTS_N_INSNS (10), /* DI */
750 COSTS_N_INSNS (10)}, /* other */
751 0, /* cost of multiply per each bit set */
752 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
753 COSTS_N_INSNS (66), /* HI */
754 COSTS_N_INSNS (66), /* SI */
755 COSTS_N_INSNS (66), /* DI */
756 COSTS_N_INSNS (66)}, /* other */
757 COSTS_N_INSNS (1), /* cost of movsx */
758 COSTS_N_INSNS (1), /* cost of movzx */
759 16, /* "large" insn */
761 4, /* cost for loading QImode using movzbl */
762 {4, 4, 4}, /* cost of loading integer registers
763 in QImode, HImode and SImode.
764 Relative to reg-reg move (2). */
765 {4, 4, 4}, /* cost of storing integer registers */
766 3, /* cost of reg,reg fld/fst */
767 {12, 12, 12}, /* cost of loading fp registers
768 in SFmode, DFmode and XFmode */
769 {4, 4, 4}, /* cost of storing fp registers
770 in SFmode, DFmode and XFmode */
771 6, /* cost of moving MMX register */
772 {12, 12}, /* cost of loading MMX registers
773 in SImode and DImode */
774 {12, 12}, /* cost of storing MMX registers
775 in SImode and DImode */
776 6, /* cost of moving SSE register */
777 {12, 12, 12}, /* cost of loading SSE registers
778 in SImode, DImode and TImode */
779 {12, 12, 12}, /* cost of storing SSE registers
780 in SImode, DImode and TImode */
781 8, /* MMX or SSE register to integer */
782 128, /* size of prefetch block */
783 8, /* number of parallel prefetches */
785 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (3), /* cost of FABS instruction. */
789 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
791 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
792 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
793 {100000, unrolled_loop}, {-1, libcall}}}},
794 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
796 {libcall, {{24, loop}, {64, unrolled_loop},
797 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
801 struct processor_costs core2_cost = {
802 COSTS_N_INSNS (1), /* cost of an add instruction */
803 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
804 COSTS_N_INSNS (1), /* variable shift costs */
805 COSTS_N_INSNS (1), /* constant shift costs */
806 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
807 COSTS_N_INSNS (3), /* HI */
808 COSTS_N_INSNS (3), /* SI */
809 COSTS_N_INSNS (3), /* DI */
810 COSTS_N_INSNS (3)}, /* other */
811 0, /* cost of multiply per each bit set */
812 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
813 COSTS_N_INSNS (22), /* HI */
814 COSTS_N_INSNS (22), /* SI */
815 COSTS_N_INSNS (22), /* DI */
816 COSTS_N_INSNS (22)}, /* other */
817 COSTS_N_INSNS (1), /* cost of movsx */
818 COSTS_N_INSNS (1), /* cost of movzx */
819 8, /* "large" insn */
821 2, /* cost for loading QImode using movzbl */
822 {6, 6, 6}, /* cost of loading integer registers
823 in QImode, HImode and SImode.
824 Relative to reg-reg move (2). */
825 {4, 4, 4}, /* cost of storing integer registers */
826 2, /* cost of reg,reg fld/fst */
827 {6, 6, 6}, /* cost of loading fp registers
828 in SFmode, DFmode and XFmode */
829 {4, 4, 4}, /* cost of loading integer registers */
830 2, /* cost of moving MMX register */
831 {6, 6}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {4, 4}, /* cost of storing MMX registers
834 in SImode and DImode */
835 2, /* cost of moving SSE register */
836 {6, 6, 6}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {4, 4, 4}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 2, /* MMX or SSE register to integer */
841 128, /* size of prefetch block */
842 8, /* number of parallel prefetches */
844 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
845 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
846 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
847 COSTS_N_INSNS (1), /* cost of FABS instruction. */
848 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
849 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
850 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
851 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
852 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
853 {{libcall, {{8, loop}, {15, unrolled_loop},
854 {2048, rep_prefix_4_byte}, {-1, libcall}}},
855 {libcall, {{24, loop}, {32, unrolled_loop},
856 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
859 /* Generic64 should produce code tuned for Nocona and K8. */
861 struct processor_costs generic64_cost = {
862 COSTS_N_INSNS (1), /* cost of an add instruction */
863 /* On all chips taken into consideration lea is 2 cycles and more. With
864 this cost however our current implementation of synth_mult results in
865 use of unnecessary temporary registers causing regression on several
866 SPECfp benchmarks. */
867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
868 COSTS_N_INSNS (1), /* variable shift costs */
869 COSTS_N_INSNS (1), /* constant shift costs */
870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
871 COSTS_N_INSNS (4), /* HI */
872 COSTS_N_INSNS (3), /* SI */
873 COSTS_N_INSNS (4), /* DI */
874 COSTS_N_INSNS (2)}, /* other */
875 0, /* cost of multiply per each bit set */
876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
877 COSTS_N_INSNS (26), /* HI */
878 COSTS_N_INSNS (42), /* SI */
879 COSTS_N_INSNS (74), /* DI */
880 COSTS_N_INSNS (74)}, /* other */
881 COSTS_N_INSNS (1), /* cost of movsx */
882 COSTS_N_INSNS (1), /* cost of movzx */
883 8, /* "large" insn */
885 4, /* cost for loading QImode using movzbl */
886 {4, 4, 4}, /* cost of loading integer registers
887 in QImode, HImode and SImode.
888 Relative to reg-reg move (2). */
889 {4, 4, 4}, /* cost of storing integer registers */
890 4, /* cost of reg,reg fld/fst */
891 {12, 12, 12}, /* cost of loading fp registers
892 in SFmode, DFmode and XFmode */
893 {6, 6, 8}, /* cost of storing fp registers
894 in SFmode, DFmode and XFmode */
895 2, /* cost of moving MMX register */
896 {8, 8}, /* cost of loading MMX registers
897 in SImode and DImode */
898 {8, 8}, /* cost of storing MMX registers
899 in SImode and DImode */
900 2, /* cost of moving SSE register */
901 {8, 8, 8}, /* cost of loading SSE registers
902 in SImode, DImode and TImode */
903 {8, 8, 8}, /* cost of storing SSE registers
904 in SImode, DImode and TImode */
905 5, /* MMX or SSE register to integer */
906 64, /* size of prefetch block */
907 6, /* number of parallel prefetches */
908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
909 is increased to perhaps more appropriate value of 5. */
911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
917 {DUMMY_STRINGOP_ALGS,
918 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
919 {DUMMY_STRINGOP_ALGS,
920 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
923 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
925 struct processor_costs generic32_cost = {
926 COSTS_N_INSNS (1), /* cost of an add instruction */
927 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
928 COSTS_N_INSNS (1), /* variable shift costs */
929 COSTS_N_INSNS (1), /* constant shift costs */
930 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
931 COSTS_N_INSNS (4), /* HI */
932 COSTS_N_INSNS (3), /* SI */
933 COSTS_N_INSNS (4), /* DI */
934 COSTS_N_INSNS (2)}, /* other */
935 0, /* cost of multiply per each bit set */
936 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
937 COSTS_N_INSNS (26), /* HI */
938 COSTS_N_INSNS (42), /* SI */
939 COSTS_N_INSNS (74), /* DI */
940 COSTS_N_INSNS (74)}, /* other */
941 COSTS_N_INSNS (1), /* cost of movsx */
942 COSTS_N_INSNS (1), /* cost of movzx */
943 8, /* "large" insn */
945 4, /* cost for loading QImode using movzbl */
946 {4, 4, 4}, /* cost of loading integer registers
947 in QImode, HImode and SImode.
948 Relative to reg-reg move (2). */
949 {4, 4, 4}, /* cost of storing integer registers */
950 4, /* cost of reg,reg fld/fst */
951 {12, 12, 12}, /* cost of loading fp registers
952 in SFmode, DFmode and XFmode */
953 {6, 6, 8}, /* cost of storing fp registers
954 in SFmode, DFmode and XFmode */
955 2, /* cost of moving MMX register */
956 {8, 8}, /* cost of loading MMX registers
957 in SImode and DImode */
958 {8, 8}, /* cost of storing MMX registers
959 in SImode and DImode */
960 2, /* cost of moving SSE register */
961 {8, 8, 8}, /* cost of loading SSE registers
962 in SImode, DImode and TImode */
963 {8, 8, 8}, /* cost of storing SSE registers
964 in SImode, DImode and TImode */
965 5, /* MMX or SSE register to integer */
966 64, /* size of prefetch block */
967 6, /* number of parallel prefetches */
969 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
970 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
971 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
972 COSTS_N_INSNS (8), /* cost of FABS instruction. */
973 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
974 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
975 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
976 DUMMY_STRINGOP_ALGS},
977 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
978 DUMMY_STRINGOP_ALGS},
981 const struct processor_costs *ix86_cost = &pentium_cost;
983 /* Processor feature/optimization bitmasks. */
984 #define m_386 (1<<PROCESSOR_I386)
985 #define m_486 (1<<PROCESSOR_I486)
986 #define m_PENT (1<<PROCESSOR_PENTIUM)
987 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
988 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
989 #define m_NOCONA (1<<PROCESSOR_NOCONA)
990 #define m_CORE2 (1<<PROCESSOR_CORE2)
992 #define m_GEODE (1<<PROCESSOR_GEODE)
993 #define m_K6 (1<<PROCESSOR_K6)
994 #define m_K6_GEODE (m_K6 | m_GEODE)
995 #define m_K8 (1<<PROCESSOR_K8)
996 #define m_ATHLON (1<<PROCESSOR_ATHLON)
997 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
998 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
999 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1002 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1004 /* Generic instruction choice should be common subset of supported CPUs
1005 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1006 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1008 /* Feature tests against the various tunings. */
1009 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1010 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1011 negatively, so enabling for Generic64 seems like good code size
1012 tradeoff. We can't enable it for 32bit generic because it does not
1013 work well with PPro base chips. */
1014 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1016 /* X86_TUNE_PUSH_MEMORY */
1017 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1018 | m_NOCONA | m_CORE2 | m_GENERIC,
1020 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1023 /* X86_TUNE_USE_BIT_TEST */
1026 /* X86_TUNE_UNROLL_STRLEN */
1027 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1029 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1030 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1232 ~(m_386 | m_486 | m_PENT | m_K6),
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1254 static enum stringop_alg stringop_alg = no_stringop;
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1283 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1286 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1291 /* SSE REX registers */
1292 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1296 /* The "default" register map used in 32bit mode. */
1298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1309 static int const x86_64_int_parameter_registers[6] =
1311 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1312 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1315 static int const x86_64_ms_abi_int_parameter_registers[4] =
1317 2 /*RCX*/, 1 /*RDX*/,
1318 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1321 static int const x86_64_int_return_registers[4] =
1323 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1326 /* The "default" register map used in 64bit mode. */
1327 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1329 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1330 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1331 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1332 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1333 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1334 8,9,10,11,12,13,14,15, /* extended integer registers */
1335 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1338 /* Define the register numbers to be used in Dwarf debugging information.
1339 The SVR4 reference port C compiler uses the following register numbers
1340 in its Dwarf output code:
1341 0 for %eax (gcc regno = 0)
1342 1 for %ecx (gcc regno = 2)
1343 2 for %edx (gcc regno = 1)
1344 3 for %ebx (gcc regno = 3)
1345 4 for %esp (gcc regno = 7)
1346 5 for %ebp (gcc regno = 6)
1347 6 for %esi (gcc regno = 4)
1348 7 for %edi (gcc regno = 5)
1349 The following three DWARF register numbers are never generated by
1350 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1351 believes these numbers have these meanings.
1352 8 for %eip (no gcc equivalent)
1353 9 for %eflags (gcc regno = 17)
1354 10 for %trapno (no gcc equivalent)
1355 It is not at all clear how we should number the FP stack registers
1356 for the x86 architecture. If the version of SDB on x86/svr4 were
1357 a bit less brain dead with respect to floating-point then we would
1358 have a precedent to follow with respect to DWARF register numbers
1359 for x86 FP registers, but the SDB on x86/svr4 is so completely
1360 broken with respect to FP registers that it is hardly worth thinking
1361 of it as something to strive for compatibility with.
1362 The version of x86/svr4 SDB I have at the moment does (partially)
1363 seem to believe that DWARF register number 11 is associated with
1364 the x86 register %st(0), but that's about all. Higher DWARF
1365 register numbers don't seem to be associated with anything in
1366 particular, and even for DWARF regno 11, SDB only seems to under-
1367 stand that it should say that a variable lives in %st(0) (when
1368 asked via an `=' command) if we said it was in DWARF regno 11,
1369 but SDB still prints garbage when asked for the value of the
1370 variable in question (via a `/' command).
1371 (Also note that the labels SDB prints for various FP stack regs
1372 when doing an `x' command are all wrong.)
1373 Note that these problems generally don't affect the native SVR4
1374 C compiler because it doesn't allow the use of -O with -g and
1375 because when it is *not* optimizing, it allocates a memory
1376 location for each floating-point variable, and the memory
1377 location is what gets described in the DWARF AT_location
1378 attribute for the variable in question.
1379 Regardless of the severe mental illness of the x86/svr4 SDB, we
1380 do something sensible here and we use the following DWARF
1381 register numbers. Note that these are all stack-top-relative
1383 11 for %st(0) (gcc regno = 8)
1384 12 for %st(1) (gcc regno = 9)
1385 13 for %st(2) (gcc regno = 10)
1386 14 for %st(3) (gcc regno = 11)
1387 15 for %st(4) (gcc regno = 12)
1388 16 for %st(5) (gcc regno = 13)
1389 17 for %st(6) (gcc regno = 14)
1390 18 for %st(7) (gcc regno = 15)
1392 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1394 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1395 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1396 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1397 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1398 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1400 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1403 /* Test and compare insns in i386.md store the information needed to
1404 generate branch and scc insns here. */
1406 rtx ix86_compare_op0 = NULL_RTX;
1407 rtx ix86_compare_op1 = NULL_RTX;
1408 rtx ix86_compare_emitted = NULL_RTX;
1410 /* Size of the register save area. */
1411 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1413 /* Define the structure for the machine field in struct function. */
1415 struct stack_local_entry GTY(())
1417 unsigned short mode;
1420 struct stack_local_entry *next;
1423 /* Structure describing stack frame layout.
1424 Stack grows downward:
1430 saved frame pointer if frame_pointer_needed
1431 <- HARD_FRAME_POINTER
1436 [va_arg registers] (
1437 > to_allocate <- FRAME_POINTER
1447 HOST_WIDE_INT frame;
1449 int outgoing_arguments_size;
1452 HOST_WIDE_INT to_allocate;
1453 /* The offsets relative to ARG_POINTER. */
1454 HOST_WIDE_INT frame_pointer_offset;
1455 HOST_WIDE_INT hard_frame_pointer_offset;
1456 HOST_WIDE_INT stack_pointer_offset;
1458 /* When save_regs_using_mov is set, emit prologue using
1459 move instead of push instructions. */
1460 bool save_regs_using_mov;
1463 /* Code model option. */
1464 enum cmodel ix86_cmodel;
1466 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1468 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1470 /* Which unit we are generating floating point math for. */
1471 enum fpmath_unit ix86_fpmath;
1473 /* Which cpu are we scheduling for. */
1474 enum processor_type ix86_tune;
1476 /* Which instruction set architecture to use. */
1477 enum processor_type ix86_arch;
1479 /* true if sse prefetch instruction is not NOOP. */
1480 int x86_prefetch_sse;
1482 /* ix86_regparm_string as a number */
1483 static int ix86_regparm;
1485 /* -mstackrealign option */
1486 extern int ix86_force_align_arg_pointer;
1487 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1489 /* Preferred alignment for stack boundary in bits. */
1490 unsigned int ix86_preferred_stack_boundary;
1492 /* Values 1-5: see jump.c */
1493 int ix86_branch_cost;
1495 /* Variables which are this size or smaller are put in the data/bss
1496 or ldata/lbss sections. */
1498 int ix86_section_threshold = 65536;
1500 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1501 char internal_label_prefix[16];
1502 int internal_label_prefix_len;
1504 /* Fence to use after loop using movnt. */
1507 /* Register class used for passing given 64bit part of the argument.
1508 These represent classes as documented by the PS ABI, with the exception
1509 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1510 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1512 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1513 whenever possible (upper half does contain padding). */
1514 enum x86_64_reg_class
1517 X86_64_INTEGER_CLASS,
1518 X86_64_INTEGERSI_CLASS,
1525 X86_64_COMPLEX_X87_CLASS,
1528 static const char * const x86_64_reg_class_name[] =
1530 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1531 "sseup", "x87", "x87up", "cplx87", "no"
1534 #define MAX_CLASSES 4
1536 /* Table of constants used by fldpi, fldln2, etc.... */
1537 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1538 static bool ext_80387_constants_init = 0;
1541 static struct machine_function * ix86_init_machine_status (void);
1542 static rtx ix86_function_value (tree, tree, bool);
1543 static int ix86_function_regparm (tree, tree);
1544 static void ix86_compute_frame_layout (struct ix86_frame *);
1545 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1549 /* The svr4 ABI for the i386 says that records and unions are returned
1551 #ifndef DEFAULT_PCC_STRUCT_RETURN
1552 #define DEFAULT_PCC_STRUCT_RETURN 1
1555 /* Bit flags that specify the ISA we are compiling for. */
1556 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1558 /* A mask of ix86_isa_flags that includes bit X if X
1559 was set or cleared on the command line. */
1560 static int ix86_isa_flags_explicit;
1562 /* Define a set of ISAs which aren't available for a given ISA. MMX
1563 and SSE ISAs are handled separately. */
1565 #define OPTION_MASK_ISA_MMX_UNSET \
1566 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1567 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1569 #define OPTION_MASK_ISA_SSE_UNSET \
1570 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1571 #define OPTION_MASK_ISA_SSE2_UNSET \
1572 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1573 #define OPTION_MASK_ISA_SSE3_UNSET \
1574 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1575 #define OPTION_MASK_ISA_SSSE3_UNSET \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1577 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1578 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1579 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1581 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1582 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1583 #define OPTION_MASK_ISA_SSE4 \
1584 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1585 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1587 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1589 /* Implement TARGET_HANDLE_OPTION. */
1592 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1600 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1609 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1621 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1630 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1657 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1658 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1666 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1685 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1686 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1695 /* Sometimes certain combinations of command options do not make
1696 sense on a particular target machine. You can define a macro
1697 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1698 defined, is executed once just after all the command options have
1701 Don't use this macro to turn on various extra optimizations for
1702 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1705 override_options (void)
1708 int ix86_tune_defaulted = 0;
1709 unsigned int ix86_arch_mask, ix86_tune_mask;
1711 /* Comes from final.c -- no real reason to change it. */
1712 #define MAX_CODE_ALIGN 16
1716 const struct processor_costs *cost; /* Processor costs */
1717 const int align_loop; /* Default alignments. */
1718 const int align_loop_max_skip;
1719 const int align_jump;
1720 const int align_jump_max_skip;
1721 const int align_func;
1723 const processor_target_table[PROCESSOR_max] =
1725 {&i386_cost, 4, 3, 4, 3, 4},
1726 {&i486_cost, 16, 15, 16, 15, 16},
1727 {&pentium_cost, 16, 7, 16, 7, 16},
1728 {&pentiumpro_cost, 16, 15, 16, 7, 16},
1729 {&geode_cost, 0, 0, 0, 0, 0},
1730 {&k6_cost, 32, 7, 32, 7, 32},
1731 {&athlon_cost, 16, 7, 16, 7, 16},
1732 {&pentium4_cost, 0, 0, 0, 0, 0},
1733 {&k8_cost, 16, 7, 16, 7, 16},
1734 {&nocona_cost, 0, 0, 0, 0, 0},
1735 {&core2_cost, 16, 7, 16, 7, 16},
1736 {&generic32_cost, 16, 7, 16, 7, 16},
1737 {&generic64_cost, 16, 7, 16, 7, 16},
1738 {&amdfam10_cost, 32, 24, 32, 7, 32}
1741 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1748 PTA_PREFETCH_SSE = 1 << 4,
1750 PTA_3DNOW_A = 1 << 6,
1754 PTA_POPCNT = 1 << 10,
1756 PTA_SSE4A = 1 << 12,
1757 PTA_NO_SAHF = 1 << 13,
1758 PTA_SSE4_1 = 1 << 14,
1759 PTA_SSE4_2 = 1 << 15
1764 const char *const name; /* processor name or nickname. */
1765 const enum processor_type processor;
1766 const unsigned /*enum pta_flags*/ flags;
1768 const processor_alias_table[] =
1770 {"i386", PROCESSOR_I386, 0},
1771 {"i486", PROCESSOR_I486, 0},
1772 {"i586", PROCESSOR_PENTIUM, 0},
1773 {"pentium", PROCESSOR_PENTIUM, 0},
1774 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1775 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1776 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1777 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1779 {"i686", PROCESSOR_PENTIUMPRO, 0},
1780 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1782 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1783 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1784 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1785 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1786 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1787 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1788 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1789 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1790 | PTA_CX16 | PTA_NO_SAHF)},
1791 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1792 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1795 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1796 |PTA_PREFETCH_SSE)},
1797 {"k6", PROCESSOR_K6, PTA_MMX},
1798 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1799 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1800 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1801 | PTA_PREFETCH_SSE)},
1802 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1803 | PTA_PREFETCH_SSE)},
1804 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1806 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1808 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1810 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1811 | PTA_MMX | PTA_SSE | PTA_SSE2
1813 {"k8", PROCESSOR_K8, (PTA_64BIT
1814 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1815 | PTA_SSE | PTA_SSE2
1817 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1818 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1819 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1821 {"opteron", PROCESSOR_K8, (PTA_64BIT
1822 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1823 | PTA_SSE | PTA_SSE2
1825 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1826 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1827 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1829 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1830 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1831 | PTA_SSE | PTA_SSE2
1833 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1834 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1835 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1837 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1838 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1839 | PTA_SSE | PTA_SSE2
1841 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1842 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1843 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1845 | PTA_CX16 | PTA_ABM)},
1846 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1847 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1848 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1850 | PTA_CX16 | PTA_ABM)},
1851 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1852 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1855 int const pta_size = ARRAY_SIZE (processor_alias_table);
1857 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1858 SUBTARGET_OVERRIDE_OPTIONS;
1861 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1862 SUBSUBTARGET_OVERRIDE_OPTIONS;
1865 /* -fPIC is the default for x86_64. */
1866 if (TARGET_MACHO && TARGET_64BIT)
1869 /* Set the default values for switches whose default depends on TARGET_64BIT
1870 in case they weren't overwritten by command line options. */
1873 /* Mach-O doesn't support omitting the frame pointer for now. */
1874 if (flag_omit_frame_pointer == 2)
1875 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1876 if (flag_asynchronous_unwind_tables == 2)
1877 flag_asynchronous_unwind_tables = 1;
1878 if (flag_pcc_struct_return == 2)
1879 flag_pcc_struct_return = 0;
1883 if (flag_omit_frame_pointer == 2)
1884 flag_omit_frame_pointer = 0;
1885 if (flag_asynchronous_unwind_tables == 2)
1886 flag_asynchronous_unwind_tables = 0;
1887 if (flag_pcc_struct_return == 2)
1888 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1891 /* Need to check -mtune=generic first. */
1892 if (ix86_tune_string)
1894 if (!strcmp (ix86_tune_string, "generic")
1895 || !strcmp (ix86_tune_string, "i686")
1896 /* As special support for cross compilers we read -mtune=native
1897 as -mtune=generic. With native compilers we won't see the
1898 -mtune=native, as it was changed by the driver. */
1899 || !strcmp (ix86_tune_string, "native"))
1902 ix86_tune_string = "generic64";
1904 ix86_tune_string = "generic32";
1906 else if (!strncmp (ix86_tune_string, "generic", 7))
1907 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1911 if (ix86_arch_string)
1912 ix86_tune_string = ix86_arch_string;
1913 if (!ix86_tune_string)
1915 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1916 ix86_tune_defaulted = 1;
1919 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1920 need to use a sensible tune option. */
1921 if (!strcmp (ix86_tune_string, "generic")
1922 || !strcmp (ix86_tune_string, "x86-64")
1923 || !strcmp (ix86_tune_string, "i686"))
1926 ix86_tune_string = "generic64";
1928 ix86_tune_string = "generic32";
1931 if (ix86_stringop_string)
1933 if (!strcmp (ix86_stringop_string, "rep_byte"))
1934 stringop_alg = rep_prefix_1_byte;
1935 else if (!strcmp (ix86_stringop_string, "libcall"))
1936 stringop_alg = libcall;
1937 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1938 stringop_alg = rep_prefix_4_byte;
1939 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1940 stringop_alg = rep_prefix_8_byte;
1941 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1942 stringop_alg = loop_1_byte;
1943 else if (!strcmp (ix86_stringop_string, "loop"))
1944 stringop_alg = loop;
1945 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1946 stringop_alg = unrolled_loop;
1948 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1950 if (!strcmp (ix86_tune_string, "x86-64"))
1951 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1952 "-mtune=generic instead as appropriate.");
1954 if (!ix86_arch_string)
1955 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1956 if (!strcmp (ix86_arch_string, "generic"))
1957 error ("generic CPU can be used only for -mtune= switch");
1958 if (!strncmp (ix86_arch_string, "generic", 7))
1959 error ("bad value (%s) for -march= switch", ix86_arch_string);
1961 if (ix86_cmodel_string != 0)
1963 if (!strcmp (ix86_cmodel_string, "small"))
1964 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1965 else if (!strcmp (ix86_cmodel_string, "medium"))
1966 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1967 else if (!strcmp (ix86_cmodel_string, "large"))
1968 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1970 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1971 else if (!strcmp (ix86_cmodel_string, "32"))
1972 ix86_cmodel = CM_32;
1973 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1974 ix86_cmodel = CM_KERNEL;
1976 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1980 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1981 use of rip-relative addressing. This eliminates fixups that
1982 would otherwise be needed if this object is to be placed in a
1983 DLL, and is essentially just as efficient as direct addressing. */
1984 if (TARGET_64BIT_MS_ABI)
1985 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1986 else if (TARGET_64BIT)
1987 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1989 ix86_cmodel = CM_32;
1991 if (ix86_asm_string != 0)
1994 && !strcmp (ix86_asm_string, "intel"))
1995 ix86_asm_dialect = ASM_INTEL;
1996 else if (!strcmp (ix86_asm_string, "att"))
1997 ix86_asm_dialect = ASM_ATT;
1999 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2001 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2002 error ("code model %qs not supported in the %s bit mode",
2003 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2004 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2005 sorry ("%i-bit mode not compiled in",
2006 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2008 for (i = 0; i < pta_size; i++)
2009 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2011 ix86_arch = processor_alias_table[i].processor;
2012 /* Default cpu tuning to the architecture. */
2013 ix86_tune = ix86_arch;
2015 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2016 error ("CPU you selected does not support x86-64 "
2019 if (processor_alias_table[i].flags & PTA_MMX
2020 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2021 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2022 if (processor_alias_table[i].flags & PTA_3DNOW
2023 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2024 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2025 if (processor_alias_table[i].flags & PTA_3DNOW_A
2026 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2027 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2028 if (processor_alias_table[i].flags & PTA_SSE
2029 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2030 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2031 if (processor_alias_table[i].flags & PTA_SSE2
2032 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2033 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2034 if (processor_alias_table[i].flags & PTA_SSE3
2035 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2036 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2037 if (processor_alias_table[i].flags & PTA_SSSE3
2038 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2039 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2040 if (processor_alias_table[i].flags & PTA_SSE4_1
2041 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2042 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2043 if (processor_alias_table[i].flags & PTA_SSE4_2
2044 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2045 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2046 if (processor_alias_table[i].flags & PTA_SSE4A
2047 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2048 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2050 if (processor_alias_table[i].flags & PTA_ABM)
2052 if (processor_alias_table[i].flags & PTA_CX16)
2053 x86_cmpxchg16b = true;
2054 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2056 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2057 x86_prefetch_sse = true;
2058 if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
2065 error ("bad value (%s) for -march= switch", ix86_arch_string);
2067 ix86_arch_mask = 1u << ix86_arch;
2068 for (i = 0; i < X86_ARCH_LAST; ++i)
2069 ix86_arch_features[i] &= ix86_arch_mask;
2071 for (i = 0; i < pta_size; i++)
2072 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2074 ix86_tune = processor_alias_table[i].processor;
2075 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2077 if (ix86_tune_defaulted)
2079 ix86_tune_string = "x86-64";
2080 for (i = 0; i < pta_size; i++)
2081 if (! strcmp (ix86_tune_string,
2082 processor_alias_table[i].name))
2084 ix86_tune = processor_alias_table[i].processor;
2087 error ("CPU you selected does not support x86-64 "
2090 /* Intel CPUs have always interpreted SSE prefetch instructions as
2091 NOPs; so, we can enable SSE prefetch instructions even when
2092 -mtune (rather than -march) points us to a processor that has them.
2093 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2094 higher processors. */
2096 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2097 x86_prefetch_sse = true;
2101 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2103 ix86_tune_mask = 1u << ix86_tune;
2104 for (i = 0; i < X86_TUNE_LAST; ++i)
2105 ix86_tune_features[i] &= ix86_tune_mask;
2108 ix86_cost = &size_cost;
2110 ix86_cost = processor_target_table[ix86_tune].cost;
2112 /* Arrange to set up i386_stack_locals for all functions. */
2113 init_machine_status = ix86_init_machine_status;
2115 /* Validate -mregparm= value. */
2116 if (ix86_regparm_string)
2119 warning (0, "-mregparm is ignored in 64-bit mode");
2120 i = atoi (ix86_regparm_string);
2121 if (i < 0 || i > REGPARM_MAX)
2122 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2127 ix86_regparm = REGPARM_MAX;
2129 /* If the user has provided any of the -malign-* options,
2130 warn and use that value only if -falign-* is not set.
2131 Remove this code in GCC 3.2 or later. */
2132 if (ix86_align_loops_string)
2134 warning (0, "-malign-loops is obsolete, use -falign-loops");
2135 if (align_loops == 0)
2137 i = atoi (ix86_align_loops_string);
2138 if (i < 0 || i > MAX_CODE_ALIGN)
2139 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2141 align_loops = 1 << i;
2145 if (ix86_align_jumps_string)
2147 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2148 if (align_jumps == 0)
2150 i = atoi (ix86_align_jumps_string);
2151 if (i < 0 || i > MAX_CODE_ALIGN)
2152 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2154 align_jumps = 1 << i;
2158 if (ix86_align_funcs_string)
2160 warning (0, "-malign-functions is obsolete, use -falign-functions");
2161 if (align_functions == 0)
2163 i = atoi (ix86_align_funcs_string);
2164 if (i < 0 || i > MAX_CODE_ALIGN)
2165 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2167 align_functions = 1 << i;
2171 /* Default align_* from the processor table. */
2172 if (align_loops == 0)
2174 align_loops = processor_target_table[ix86_tune].align_loop;
2175 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2177 if (align_jumps == 0)
2179 align_jumps = processor_target_table[ix86_tune].align_jump;
2180 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2182 if (align_functions == 0)
2184 align_functions = processor_target_table[ix86_tune].align_func;
2187 /* Validate -mbranch-cost= value, or provide default. */
2188 ix86_branch_cost = ix86_cost->branch_cost;
2189 if (ix86_branch_cost_string)
2191 i = atoi (ix86_branch_cost_string);
2193 error ("-mbranch-cost=%d is not between 0 and 5", i);
2195 ix86_branch_cost = i;
2197 if (ix86_section_threshold_string)
2199 i = atoi (ix86_section_threshold_string);
2201 error ("-mlarge-data-threshold=%d is negative", i);
2203 ix86_section_threshold = i;
2206 if (ix86_tls_dialect_string)
2208 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2209 ix86_tls_dialect = TLS_DIALECT_GNU;
2210 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2211 ix86_tls_dialect = TLS_DIALECT_GNU2;
2212 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2213 ix86_tls_dialect = TLS_DIALECT_SUN;
2215 error ("bad value (%s) for -mtls-dialect= switch",
2216 ix86_tls_dialect_string);
2219 if (ix87_precision_string)
2221 i = atoi (ix87_precision_string);
2222 if (i != 32 && i != 64 && i != 80)
2223 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2228 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2230 /* Enable by default the SSE and MMX builtins. Do allow the user to
2231 explicitly disable any of these. In particular, disabling SSE and
2232 MMX for kernel code is extremely useful. */
2234 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2235 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2238 warning (0, "-mrtd is ignored in 64bit mode");
2242 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2245 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2247 /* i386 ABI does not specify red zone. It still makes sense to use it
2248 when programmer takes care to stack from being destroyed. */
2249 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2250 target_flags |= MASK_NO_RED_ZONE;
2253 /* Keep nonleaf frame pointers. */
2254 if (flag_omit_frame_pointer)
2255 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2256 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2257 flag_omit_frame_pointer = 1;
2259 /* If we're doing fast math, we don't care about comparison order
2260 wrt NaNs. This lets us use a shorter comparison sequence. */
2261 if (flag_finite_math_only)
2262 target_flags &= ~MASK_IEEE_FP;
2264 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2265 since the insns won't need emulation. */
2266 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2267 target_flags &= ~MASK_NO_FANCY_MATH_387;
2269 /* Likewise, if the target doesn't have a 387, or we've specified
2270 software floating point, don't use 387 inline intrinsics. */
2272 target_flags |= MASK_NO_FANCY_MATH_387;
2274 /* Turn on SSE4.1 builtins for -msse4.2. */
2276 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2278 /* Turn on SSSE3 builtins for -msse4.1. */
2280 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2282 /* Turn on SSE3 builtins for -mssse3. */
2284 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2286 /* Turn on SSE3 builtins for -msse4a. */
2288 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2290 /* Turn on SSE2 builtins for -msse3. */
2292 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2294 /* Turn on SSE builtins for -msse2. */
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2298 /* Turn on MMX builtins for -msse. */
2301 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2302 x86_prefetch_sse = true;
2305 /* Turn on MMX builtins for 3Dnow. */
2307 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2309 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2310 if (TARGET_SSE4_2 || TARGET_ABM)
2313 /* Validate -mpreferred-stack-boundary= value, or provide default.
2314 The default of 128 bits is for Pentium III's SSE __m128. We can't
2315 change it because of optimize_size. Otherwise, we can't mix object
2316 files compiled with -Os and -On. */
2317 ix86_preferred_stack_boundary = 128;
2318 if (ix86_preferred_stack_boundary_string)
2320 i = atoi (ix86_preferred_stack_boundary_string);
2321 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2322 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2323 TARGET_64BIT ? 4 : 2);
2325 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2328 /* Accept -msseregparm only if at least SSE support is enabled. */
2329 if (TARGET_SSEREGPARM
2331 error ("-msseregparm used without SSE enabled");
2333 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2334 if (ix86_fpmath_string != 0)
2336 if (! strcmp (ix86_fpmath_string, "387"))
2337 ix86_fpmath = FPMATH_387;
2338 else if (! strcmp (ix86_fpmath_string, "sse"))
2342 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2343 ix86_fpmath = FPMATH_387;
2346 ix86_fpmath = FPMATH_SSE;
2348 else if (! strcmp (ix86_fpmath_string, "387,sse")
2349 || ! strcmp (ix86_fpmath_string, "sse,387"))
2353 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2354 ix86_fpmath = FPMATH_387;
2356 else if (!TARGET_80387)
2358 warning (0, "387 instruction set disabled, using SSE arithmetics");
2359 ix86_fpmath = FPMATH_SSE;
2362 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2365 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2368 /* If the i387 is disabled, then do not return values in it. */
2370 target_flags &= ~MASK_FLOAT_RETURNS;
2372 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2373 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2375 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2377 /* ??? Unwind info is not correct around the CFG unless either a frame
2378 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2379 unwind info generation to be aware of the CFG and propagating states
2381 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2382 || flag_exceptions || flag_non_call_exceptions)
2383 && flag_omit_frame_pointer
2384 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2386 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2387 warning (0, "unwind tables currently require either a frame pointer "
2388 "or -maccumulate-outgoing-args for correctness");
2389 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2392 /* For sane SSE instruction set generation we need fcomi instruction.
2393 It is safe to enable all CMOVE instructions. */
2397 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2400 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2401 p = strchr (internal_label_prefix, 'X');
2402 internal_label_prefix_len = p - internal_label_prefix;
2406 /* When scheduling description is not available, disable scheduler pass
2407 so it won't slow down the compilation and make x87 code slower. */
2408 if (!TARGET_SCHEDULE)
2409 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2411 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2412 set_param_value ("simultaneous-prefetches",
2413 ix86_cost->simultaneous_prefetches);
2414 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2415 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2418 /* Return true if this goes in large data/bss. */
2421 ix86_in_large_data_p (tree exp)
2423 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2426 /* Functions are never large data. */
2427 if (TREE_CODE (exp) == FUNCTION_DECL)
2430 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2432 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2433 if (strcmp (section, ".ldata") == 0
2434 || strcmp (section, ".lbss") == 0)
2440 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2442 /* If this is an incomplete type with size 0, then we can't put it
2443 in data because it might be too big when completed. */
2444 if (!size || size > ix86_section_threshold)
2451 /* Switch to the appropriate section for output of DECL.
2452 DECL is either a `VAR_DECL' node or a constant of some sort.
2453 RELOC indicates whether forming the initial value of DECL requires
2454 link-time relocations. */
2456 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2460 x86_64_elf_select_section (tree decl, int reloc,
2461 unsigned HOST_WIDE_INT align)
2463 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2464 && ix86_in_large_data_p (decl))
2466 const char *sname = NULL;
2467 unsigned int flags = SECTION_WRITE;
2468 switch (categorize_decl_for_section (decl, reloc))
2473 case SECCAT_DATA_REL:
2474 sname = ".ldata.rel";
2476 case SECCAT_DATA_REL_LOCAL:
2477 sname = ".ldata.rel.local";
2479 case SECCAT_DATA_REL_RO:
2480 sname = ".ldata.rel.ro";
2482 case SECCAT_DATA_REL_RO_LOCAL:
2483 sname = ".ldata.rel.ro.local";
2487 flags |= SECTION_BSS;
2490 case SECCAT_RODATA_MERGE_STR:
2491 case SECCAT_RODATA_MERGE_STR_INIT:
2492 case SECCAT_RODATA_MERGE_CONST:
2496 case SECCAT_SRODATA:
2503 /* We don't split these for medium model. Place them into
2504 default sections and hope for best. */
2509 /* We might get called with string constants, but get_named_section
2510 doesn't like them as they are not DECLs. Also, we need to set
2511 flags in that case. */
2513 return get_section (sname, flags, NULL);
2514 return get_named_section (decl, sname, reloc);
2517 return default_elf_select_section (decl, reloc, align);
2520 /* Build up a unique section name, expressed as a
2521 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2522 RELOC indicates whether the initial value of EXP requires
2523 link-time relocations. */
2525 static void ATTRIBUTE_UNUSED
2526 x86_64_elf_unique_section (tree decl, int reloc)
2528 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2529 && ix86_in_large_data_p (decl))
2531 const char *prefix = NULL;
2532 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2533 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2535 switch (categorize_decl_for_section (decl, reloc))
2538 case SECCAT_DATA_REL:
2539 case SECCAT_DATA_REL_LOCAL:
2540 case SECCAT_DATA_REL_RO:
2541 case SECCAT_DATA_REL_RO_LOCAL:
2542 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2545 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2548 case SECCAT_RODATA_MERGE_STR:
2549 case SECCAT_RODATA_MERGE_STR_INIT:
2550 case SECCAT_RODATA_MERGE_CONST:
2551 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2553 case SECCAT_SRODATA:
2560 /* We don't split these for medium model. Place them into
2561 default sections and hope for best. */
2569 plen = strlen (prefix);
2571 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2572 name = targetm.strip_name_encoding (name);
2573 nlen = strlen (name);
2575 string = (char *) alloca (nlen + plen + 1);
2576 memcpy (string, prefix, plen);
2577 memcpy (string + plen, name, nlen + 1);
2579 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2583 default_unique_section (decl, reloc);
2586 #ifdef COMMON_ASM_OP
2587 /* This says how to output assembler code to declare an
2588 uninitialized external linkage data object.
2590 For medium model x86-64 we need to use .largecomm opcode for
2593 x86_elf_aligned_common (FILE *file,
2594 const char *name, unsigned HOST_WIDE_INT size,
2597 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2598 && size > (unsigned int)ix86_section_threshold)
2599 fprintf (file, ".largecomm\t");
2601 fprintf (file, "%s", COMMON_ASM_OP);
2602 assemble_name (file, name);
2603 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2604 size, align / BITS_PER_UNIT);
2608 /* Utility function for targets to use in implementing
2609 ASM_OUTPUT_ALIGNED_BSS. */
2612 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2613 const char *name, unsigned HOST_WIDE_INT size,
2616 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2617 && size > (unsigned int)ix86_section_threshold)
2618 switch_to_section (get_named_section (decl, ".lbss", 0));
2620 switch_to_section (bss_section);
2621 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2622 #ifdef ASM_DECLARE_OBJECT_NAME
2623 last_assemble_variable_decl = decl;
2624 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2626 /* Standard thing is just output label for the object. */
2627 ASM_OUTPUT_LABEL (file, name);
2628 #endif /* ASM_DECLARE_OBJECT_NAME */
2629 ASM_OUTPUT_SKIP (file, size ? size : 1);
2633 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2635 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2636 make the problem with not enough registers even worse. */
2637 #ifdef INSN_SCHEDULING
2639 flag_schedule_insns = 0;
2643 /* The Darwin libraries never set errno, so we might as well
2644 avoid calling them when that's the only reason we would. */
2645 flag_errno_math = 0;
2647 /* The default values of these switches depend on the TARGET_64BIT
2648 that is not known at this moment. Mark these values with 2 and
2649 let user the to override these. In case there is no command line option
2650 specifying them, we will set the defaults in override_options. */
2652 flag_omit_frame_pointer = 2;
2653 flag_pcc_struct_return = 2;
2654 flag_asynchronous_unwind_tables = 2;
2655 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2656 SUBTARGET_OPTIMIZATION_OPTIONS;
2660 /* Decide whether we can make a sibling call to a function. DECL is the
2661 declaration of the function being targeted by the call and EXP is the
2662 CALL_EXPR representing the call. */
2665 ix86_function_ok_for_sibcall (tree decl, tree exp)
2670 /* If we are generating position-independent code, we cannot sibcall
2671 optimize any indirect call, or a direct call to a global function,
2672 as the PLT requires %ebx be live. */
2673 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2680 func = TREE_TYPE (CALL_EXPR_FN (exp));
2681 if (POINTER_TYPE_P (func))
2682 func = TREE_TYPE (func);
2685 /* Check that the return value locations are the same. Like
2686 if we are returning floats on the 80387 register stack, we cannot
2687 make a sibcall from a function that doesn't return a float to a
2688 function that does or, conversely, from a function that does return
2689 a float to a function that doesn't; the necessary stack adjustment
2690 would not be executed. This is also the place we notice
2691 differences in the return value ABI. Note that it is ok for one
2692 of the functions to have void return type as long as the return
2693 value of the other is passed in a register. */
2694 a = ix86_function_value (TREE_TYPE (exp), func, false);
2695 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2697 if (STACK_REG_P (a) || STACK_REG_P (b))
2699 if (!rtx_equal_p (a, b))
2702 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2704 else if (!rtx_equal_p (a, b))
2707 /* If this call is indirect, we'll need to be able to use a call-clobbered
2708 register for the address of the target function. Make sure that all
2709 such registers are not used for passing parameters. */
2710 if (!decl && !TARGET_64BIT)
2714 /* We're looking at the CALL_EXPR, we need the type of the function. */
2715 type = CALL_EXPR_FN (exp); /* pointer expression */
2716 type = TREE_TYPE (type); /* pointer type */
2717 type = TREE_TYPE (type); /* function type */
2719 if (ix86_function_regparm (type, NULL) >= 3)
2721 /* ??? Need to count the actual number of registers to be used,
2722 not the possible number of registers. Fix later. */
2727 /* Dllimport'd functions are also called indirectly. */
2728 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2729 && decl && DECL_DLLIMPORT_P (decl)
2730 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2733 /* If we forced aligned the stack, then sibcalling would unalign the
2734 stack, which may break the called function. */
2735 if (cfun->machine->force_align_arg_pointer)
2738 /* Otherwise okay. That also includes certain types of indirect calls. */
2742 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2743 calling convention attributes;
2744 arguments as in struct attribute_spec.handler. */
2747 ix86_handle_cconv_attribute (tree *node, tree name,
2749 int flags ATTRIBUTE_UNUSED,
2752 if (TREE_CODE (*node) != FUNCTION_TYPE
2753 && TREE_CODE (*node) != METHOD_TYPE
2754 && TREE_CODE (*node) != FIELD_DECL
2755 && TREE_CODE (*node) != TYPE_DECL)
2757 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2758 IDENTIFIER_POINTER (name));
2759 *no_add_attrs = true;
2763 /* Can combine regparm with all attributes but fastcall. */
2764 if (is_attribute_p ("regparm", name))
2768 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2770 error ("fastcall and regparm attributes are not compatible");
2773 cst = TREE_VALUE (args);
2774 if (TREE_CODE (cst) != INTEGER_CST)
2776 warning (OPT_Wattributes,
2777 "%qs attribute requires an integer constant argument",
2778 IDENTIFIER_POINTER (name));
2779 *no_add_attrs = true;
2781 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2783 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2784 IDENTIFIER_POINTER (name), REGPARM_MAX);
2785 *no_add_attrs = true;
2789 && lookup_attribute (ix86_force_align_arg_pointer_string,
2790 TYPE_ATTRIBUTES (*node))
2791 && compare_tree_int (cst, REGPARM_MAX-1))
2793 error ("%s functions limited to %d register parameters",
2794 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2802 /* Do not warn when emulating the MS ABI. */
2803 if (!TARGET_64BIT_MS_ABI)
2804 warning (OPT_Wattributes, "%qs attribute ignored",
2805 IDENTIFIER_POINTER (name));
2806 *no_add_attrs = true;
2810 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2811 if (is_attribute_p ("fastcall", name))
2813 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2815 error ("fastcall and cdecl attributes are not compatible");
2817 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2819 error ("fastcall and stdcall attributes are not compatible");
2821 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2823 error ("fastcall and regparm attributes are not compatible");
2827 /* Can combine stdcall with fastcall (redundant), regparm and
2829 else if (is_attribute_p ("stdcall", name))
2831 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2833 error ("stdcall and cdecl attributes are not compatible");
2835 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2837 error ("stdcall and fastcall attributes are not compatible");
2841 /* Can combine cdecl with regparm and sseregparm. */
2842 else if (is_attribute_p ("cdecl", name))
2844 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2846 error ("stdcall and cdecl attributes are not compatible");
2848 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2850 error ("fastcall and cdecl attributes are not compatible");
2854 /* Can combine sseregparm with all attributes. */
2859 /* Return 0 if the attributes for two types are incompatible, 1 if they
2860 are compatible, and 2 if they are nearly compatible (which causes a
2861 warning to be generated). */
2864 ix86_comp_type_attributes (tree type1, tree type2)
2866 /* Check for mismatch of non-default calling convention. */
2867 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2869 if (TREE_CODE (type1) != FUNCTION_TYPE)
2872 /* Check for mismatched fastcall/regparm types. */
2873 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2874 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2875 || (ix86_function_regparm (type1, NULL)
2876 != ix86_function_regparm (type2, NULL)))
2879 /* Check for mismatched sseregparm types. */
2880 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2881 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2884 /* Check for mismatched return types (cdecl vs stdcall). */
2885 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2886 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2892 /* Return the regparm value for a function with the indicated TYPE and DECL.
2893 DECL may be NULL when calling function indirectly
2894 or considering a libcall. */
2897 ix86_function_regparm (tree type, tree decl)
2900 int regparm = ix86_regparm;
2905 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2907 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2909 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2912 /* Use register calling convention for local functions when possible. */
2913 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2914 && flag_unit_at_a_time && !profile_flag)
2916 struct cgraph_local_info *i = cgraph_local_info (decl);
2919 int local_regparm, globals = 0, regno;
2922 /* Make sure no regparm register is taken by a
2923 global register variable. */
2924 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2925 if (global_regs[local_regparm])
2928 /* We can't use regparm(3) for nested functions as these use
2929 static chain pointer in third argument. */
2930 if (local_regparm == 3
2931 && (decl_function_context (decl)
2932 || ix86_force_align_arg_pointer)
2933 && !DECL_NO_STATIC_CHAIN (decl))
2936 /* If the function realigns its stackpointer, the prologue will
2937 clobber %ecx. If we've already generated code for the callee,
2938 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2939 scanning the attributes for the self-realigning property. */
2940 f = DECL_STRUCT_FUNCTION (decl);
2941 if (local_regparm == 3
2942 && (f ? !!f->machine->force_align_arg_pointer
2943 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2944 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2947 /* Each global register variable increases register preassure,
2948 so the more global reg vars there are, the smaller regparm
2949 optimization use, unless requested by the user explicitly. */
2950 for (regno = 0; regno < 6; regno++)
2951 if (global_regs[regno])
2954 = globals < local_regparm ? local_regparm - globals : 0;
2956 if (local_regparm > regparm)
2957 regparm = local_regparm;
2964 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2965 DFmode (2) arguments in SSE registers for a function with the
2966 indicated TYPE and DECL. DECL may be NULL when calling function
2967 indirectly or considering a libcall. Otherwise return 0. */
2970 ix86_function_sseregparm (tree type, tree decl)
2972 gcc_assert (!TARGET_64BIT);
2974 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2975 by the sseregparm attribute. */
2976 if (TARGET_SSEREGPARM
2977 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2982 error ("Calling %qD with attribute sseregparm without "
2983 "SSE/SSE2 enabled", decl);
2985 error ("Calling %qT with attribute sseregparm without "
2986 "SSE/SSE2 enabled", type);
2993 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2994 (and DFmode for SSE2) arguments in SSE registers. */
2995 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2997 struct cgraph_local_info *i = cgraph_local_info (decl);
2999 return TARGET_SSE2 ? 2 : 1;
3005 /* Return true if EAX is live at the start of the function. Used by
3006 ix86_expand_prologue to determine if we need special help before
3007 calling allocate_stack_worker. */
3010 ix86_eax_live_at_start_p (void)
3012 /* Cheat. Don't bother working forward from ix86_function_regparm
3013 to the function type to whether an actual argument is located in
3014 eax. Instead just look at cfg info, which is still close enough
3015 to correct at this point. This gives false positives for broken
3016 functions that might use uninitialized data that happens to be
3017 allocated in eax, but who cares? */
3018 return REGNO_REG_SET_P (DF_LIVE_OUT (ENTRY_BLOCK_PTR), 0);
3021 /* Return true if TYPE has a variable argument list. */
3024 type_has_variadic_args_p (tree type)
3026 tree n, t = TYPE_ARG_TYPES (type);
3031 while ((n = TREE_CHAIN (t)) != NULL)
3034 return TREE_VALUE (t) != void_type_node;
3037 /* Value is the number of bytes of arguments automatically
3038 popped when returning from a subroutine call.
3039 FUNDECL is the declaration node of the function (as a tree),
3040 FUNTYPE is the data type of the function (as a tree),
3041 or for a library call it is an identifier node for the subroutine name.
3042 SIZE is the number of bytes of arguments passed on the stack.
3044 On the 80386, the RTD insn may be used to pop them if the number
3045 of args is fixed, but if the number is variable then the caller
3046 must pop them all. RTD can't be used for library calls now
3047 because the library is compiled with the Unix compiler.
3048 Use of RTD is a selectable option, since it is incompatible with
3049 standard Unix calling sequences. If the option is not selected,
3050 the caller must always pop the args.
3052 The attribute stdcall is equivalent to RTD on a per module basis. */
3055 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3059 /* None of the 64-bit ABIs pop arguments. */
3063 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3065 /* Cdecl functions override -mrtd, and never pop the stack. */
3066 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3068 /* Stdcall and fastcall functions will pop the stack if not
3070 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3071 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3074 if (rtd && ! type_has_variadic_args_p (funtype))
3078 /* Lose any fake structure return argument if it is passed on the stack. */
3079 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3080 && !KEEP_AGGREGATE_RETURN_POINTER)
3082 int nregs = ix86_function_regparm (funtype, fundecl);
3084 return GET_MODE_SIZE (Pmode);
3090 /* Argument support functions. */
3092 /* Return true when register may be used to pass function parameters. */
3094 ix86_function_arg_regno_p (int regno)
3097 const int *parm_regs;
3102 return (regno < REGPARM_MAX
3103 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3105 return (regno < REGPARM_MAX
3106 || (TARGET_MMX && MMX_REGNO_P (regno)
3107 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3108 || (TARGET_SSE && SSE_REGNO_P (regno)
3109 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3114 if (SSE_REGNO_P (regno) && TARGET_SSE)
3119 if (TARGET_SSE && SSE_REGNO_P (regno)
3120 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3124 /* RAX is used as hidden argument to va_arg functions. */
3125 if (!TARGET_64BIT_MS_ABI && regno == 0)
3128 if (TARGET_64BIT_MS_ABI)
3129 parm_regs = x86_64_ms_abi_int_parameter_registers;
3131 parm_regs = x86_64_int_parameter_registers;
3132 for (i = 0; i < REGPARM_MAX; i++)
3133 if (regno == parm_regs[i])
3138 /* Return if we do not know how to pass TYPE solely in registers. */
3141 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3143 if (must_pass_in_stack_var_size_or_pad (mode, type))
3146 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3147 The layout_type routine is crafty and tries to trick us into passing
3148 currently unsupported vector types on the stack by using TImode. */
3149 return (!TARGET_64BIT && mode == TImode
3150 && type && TREE_CODE (type) != VECTOR_TYPE);
3153 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3154 for a call to a function whose data type is FNTYPE.
3155 For a library call, FNTYPE is 0. */
3158 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3159 tree fntype, /* tree ptr for function decl */
3160 rtx libname, /* SYMBOL_REF of library name or 0 */
3163 memset (cum, 0, sizeof (*cum));
3165 /* Set up the number of registers to use for passing arguments. */
3166 cum->nregs = ix86_regparm;
3168 cum->sse_nregs = SSE_REGPARM_MAX;
3170 cum->mmx_nregs = MMX_REGPARM_MAX;
3171 cum->warn_sse = true;
3172 cum->warn_mmx = true;
3173 cum->maybe_vaarg = (fntype
3174 ? (!TYPE_ARG_TYPES (fntype)
3175 || type_has_variadic_args_p (fntype))
3180 /* If there are variable arguments, then we won't pass anything
3181 in registers in 32-bit mode. */
3182 if (cum->maybe_vaarg)
3192 /* Use ecx and edx registers if function has fastcall attribute,
3193 else look for regparm information. */
3196 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3202 cum->nregs = ix86_function_regparm (fntype, fndecl);
3205 /* Set up the number of SSE registers used for passing SFmode
3206 and DFmode arguments. Warn for mismatching ABI. */
3207 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3211 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3212 But in the case of vector types, it is some vector mode.
3214 When we have only some of our vector isa extensions enabled, then there
3215 are some modes for which vector_mode_supported_p is false. For these
3216 modes, the generic vector support in gcc will choose some non-vector mode
3217 in order to implement the type. By computing the natural mode, we'll
3218 select the proper ABI location for the operand and not depend on whatever
3219 the middle-end decides to do with these vector types. */
3221 static enum machine_mode
3222 type_natural_mode (tree type)
3224 enum machine_mode mode = TYPE_MODE (type);
3226 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3228 HOST_WIDE_INT size = int_size_in_bytes (type);
3229 if ((size == 8 || size == 16)
3230 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3231 && TYPE_VECTOR_SUBPARTS (type) > 1)
3233 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3235 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3236 mode = MIN_MODE_VECTOR_FLOAT;
3238 mode = MIN_MODE_VECTOR_INT;
3240 /* Get the mode which has this inner mode and number of units. */
3241 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3242 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3243 && GET_MODE_INNER (mode) == innermode)
3253 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3254 this may not agree with the mode that the type system has chosen for the
3255 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3256 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3259 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3264 if (orig_mode != BLKmode)
3265 tmp = gen_rtx_REG (orig_mode, regno);
3268 tmp = gen_rtx_REG (mode, regno);
3269 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3270 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3276 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3277 of this code is to classify each 8bytes of incoming argument by the register
3278 class and assign registers accordingly. */
3280 /* Return the union class of CLASS1 and CLASS2.
3281 See the x86-64 PS ABI for details. */
3283 static enum x86_64_reg_class
3284 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3286 /* Rule #1: If both classes are equal, this is the resulting class. */
3287 if (class1 == class2)
3290 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3292 if (class1 == X86_64_NO_CLASS)
3294 if (class2 == X86_64_NO_CLASS)
3297 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3298 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3299 return X86_64_MEMORY_CLASS;
3301 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3302 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3303 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3304 return X86_64_INTEGERSI_CLASS;
3305 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3306 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3307 return X86_64_INTEGER_CLASS;
3309 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3311 if (class1 == X86_64_X87_CLASS
3312 || class1 == X86_64_X87UP_CLASS
3313 || class1 == X86_64_COMPLEX_X87_CLASS
3314 || class2 == X86_64_X87_CLASS
3315 || class2 == X86_64_X87UP_CLASS
3316 || class2 == X86_64_COMPLEX_X87_CLASS)
3317 return X86_64_MEMORY_CLASS;
3319 /* Rule #6: Otherwise class SSE is used. */
3320 return X86_64_SSE_CLASS;
3323 /* Classify the argument of type TYPE and mode MODE.
3324 CLASSES will be filled by the register class used to pass each word
3325 of the operand. The number of words is returned. In case the parameter
3326 should be passed in memory, 0 is returned. As a special case for zero
3327 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3329 BIT_OFFSET is used internally for handling records and specifies offset
3330 of the offset in bits modulo 256 to avoid overflow cases.
3332 See the x86-64 PS ABI for details.
3336 classify_argument (enum machine_mode mode, tree type,
3337 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3339 HOST_WIDE_INT bytes =
3340 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3341 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3343 /* Variable sized entities are always passed/returned in memory. */
3347 if (mode != VOIDmode
3348 && targetm.calls.must_pass_in_stack (mode, type))
3351 if (type && AGGREGATE_TYPE_P (type))
3355 enum x86_64_reg_class subclasses[MAX_CLASSES];
3357 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3361 for (i = 0; i < words; i++)
3362 classes[i] = X86_64_NO_CLASS;
3364 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3365 signalize memory class, so handle it as special case. */
3368 classes[0] = X86_64_NO_CLASS;
3372 /* Classify each field of record and merge classes. */
3373 switch (TREE_CODE (type))
3376 /* And now merge the fields of structure. */
3377 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3379 if (TREE_CODE (field) == FIELD_DECL)
3383 if (TREE_TYPE (field) == error_mark_node)
3386 /* Bitfields are always classified as integer. Handle them
3387 early, since later code would consider them to be
3388 misaligned integers. */
3389 if (DECL_BIT_FIELD (field))
3391 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3392 i < ((int_bit_position (field) + (bit_offset % 64))
3393 + tree_low_cst (DECL_SIZE (field), 0)
3396 merge_classes (X86_64_INTEGER_CLASS,
3401 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3402 TREE_TYPE (field), subclasses,
3403 (int_bit_position (field)
3404 + bit_offset) % 256);
3407 for (i = 0; i < num; i++)
3410 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3412 merge_classes (subclasses[i], classes[i + pos]);
3420 /* Arrays are handled as small records. */
3423 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3424 TREE_TYPE (type), subclasses, bit_offset);
3428 /* The partial classes are now full classes. */
3429 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3430 subclasses[0] = X86_64_SSE_CLASS;
3431 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3432 subclasses[0] = X86_64_INTEGER_CLASS;
3434 for (i = 0; i < words; i++)
3435 classes[i] = subclasses[i % num];
3440 case QUAL_UNION_TYPE:
3441 /* Unions are similar to RECORD_TYPE but offset is always 0.
3443 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3445 if (TREE_CODE (field) == FIELD_DECL)
3449 if (TREE_TYPE (field) == error_mark_node)
3452 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3453 TREE_TYPE (field), subclasses,
3457 for (i = 0; i < num; i++)
3458 classes[i] = merge_classes (subclasses[i], classes[i]);
3467 /* Final merger cleanup. */
3468 for (i = 0; i < words; i++)
3470 /* If one class is MEMORY, everything should be passed in
3472 if (classes[i] == X86_64_MEMORY_CLASS)
3475 /* The X86_64_SSEUP_CLASS should be always preceded by
3476 X86_64_SSE_CLASS. */
3477 if (classes[i] == X86_64_SSEUP_CLASS
3478 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3479 classes[i] = X86_64_SSE_CLASS;
3481 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3482 if (classes[i] == X86_64_X87UP_CLASS
3483 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3484 classes[i] = X86_64_SSE_CLASS;
3489 /* Compute alignment needed. We align all types to natural boundaries with
3490 exception of XFmode that is aligned to 64bits. */
3491 if (mode != VOIDmode && mode != BLKmode)
3493 int mode_alignment = GET_MODE_BITSIZE (mode);
3496 mode_alignment = 128;
3497 else if (mode == XCmode)
3498 mode_alignment = 256;
3499 if (COMPLEX_MODE_P (mode))
3500 mode_alignment /= 2;
3501 /* Misaligned fields are always returned in memory. */
3502 if (bit_offset % mode_alignment)
3506 /* for V1xx modes, just use the base mode */
3507 if (VECTOR_MODE_P (mode)
3508 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3509 mode = GET_MODE_INNER (mode);
3511 /* Classification of atomic types. */
3516 classes[0] = X86_64_SSE_CLASS;
3519 classes[0] = X86_64_SSE_CLASS;
3520 classes[1] = X86_64_SSEUP_CLASS;
3529 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3530 classes[0] = X86_64_INTEGERSI_CLASS;
3532 classes[0] = X86_64_INTEGER_CLASS;
3536 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3541 if (!(bit_offset % 64))
3542 classes[0] = X86_64_SSESF_CLASS;
3544 classes[0] = X86_64_SSE_CLASS;
3547 classes[0] = X86_64_SSEDF_CLASS;
3550 classes[0] = X86_64_X87_CLASS;
3551 classes[1] = X86_64_X87UP_CLASS;
3554 classes[0] = X86_64_SSE_CLASS;
3555 classes[1] = X86_64_SSEUP_CLASS;
3558 classes[0] = X86_64_SSE_CLASS;
3561 classes[0] = X86_64_SSEDF_CLASS;
3562 classes[1] = X86_64_SSEDF_CLASS;
3565 classes[0] = X86_64_COMPLEX_X87_CLASS;
3568 /* This modes is larger than 16 bytes. */
3576 classes[0] = X86_64_SSE_CLASS;
3577 classes[1] = X86_64_SSEUP_CLASS;
3583 classes[0] = X86_64_SSE_CLASS;
3589 gcc_assert (VECTOR_MODE_P (mode));
3594 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3596 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3597 classes[0] = X86_64_INTEGERSI_CLASS;
3599 classes[0] = X86_64_INTEGER_CLASS;
3600 classes[1] = X86_64_INTEGER_CLASS;
3601 return 1 + (bytes > 8);
3605 /* Examine the argument and return set number of register required in each
3606 class. Return 0 iff parameter should be passed in memory. */
3608 examine_argument (enum machine_mode mode, tree type, int in_return,
3609 int *int_nregs, int *sse_nregs)
3611 enum x86_64_reg_class regclass[MAX_CLASSES];
3612 int n = classify_argument (mode, type, regclass, 0);
3618 for (n--; n >= 0; n--)
3619 switch (regclass[n])
3621 case X86_64_INTEGER_CLASS:
3622 case X86_64_INTEGERSI_CLASS:
3625 case X86_64_SSE_CLASS:
3626 case X86_64_SSESF_CLASS:
3627 case X86_64_SSEDF_CLASS:
3630 case X86_64_NO_CLASS:
3631 case X86_64_SSEUP_CLASS:
3633 case X86_64_X87_CLASS:
3634 case X86_64_X87UP_CLASS:
3638 case X86_64_COMPLEX_X87_CLASS:
3639 return in_return ? 2 : 0;
3640 case X86_64_MEMORY_CLASS:
3646 /* Construct container for the argument used by GCC interface. See
3647 FUNCTION_ARG for the detailed description. */
3650 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3651 tree type, int in_return, int nintregs, int nsseregs,
3652 const int *intreg, int sse_regno)
3654 /* The following variables hold the static issued_error state. */
3655 static bool issued_sse_arg_error;
3656 static bool issued_sse_ret_error;
3657 static bool issued_x87_ret_error;
3659 enum machine_mode tmpmode;
3661 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3662 enum x86_64_reg_class regclass[MAX_CLASSES];
3666 int needed_sseregs, needed_intregs;
3667 rtx exp[MAX_CLASSES];
3670 n = classify_argument (mode, type, regclass, 0);
3673 if (!examine_argument (mode, type, in_return, &needed_intregs,
3676 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3679 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3680 some less clueful developer tries to use floating-point anyway. */
3681 if (needed_sseregs && !TARGET_SSE)
3685 if (!issued_sse_ret_error)
3687 error ("SSE register return with SSE disabled");
3688 issued_sse_ret_error = true;
3691 else if (!issued_sse_arg_error)
3693 error ("SSE register argument with SSE disabled");
3694 issued_sse_arg_error = true;
3699 /* Likewise, error if the ABI requires us to return values in the
3700 x87 registers and the user specified -mno-80387. */
3701 if (!TARGET_80387 && in_return)
3702 for (i = 0; i < n; i++)
3703 if (regclass[i] == X86_64_X87_CLASS
3704 || regclass[i] == X86_64_X87UP_CLASS
3705 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3707 if (!issued_x87_ret_error)
3709 error ("x87 register return with x87 disabled");
3710 issued_x87_ret_error = true;
3715 /* First construct simple cases. Avoid SCmode, since we want to use
3716 single register to pass this type. */
3717 if (n == 1 && mode != SCmode)
3718 switch (regclass[0])
3720 case X86_64_INTEGER_CLASS:
3721 case X86_64_INTEGERSI_CLASS:
3722 return gen_rtx_REG (mode, intreg[0]);
3723 case X86_64_SSE_CLASS:
3724 case X86_64_SSESF_CLASS:
3725 case X86_64_SSEDF_CLASS:
3726 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3727 case X86_64_X87_CLASS:
3728 case X86_64_COMPLEX_X87_CLASS:
3729 return gen_rtx_REG (mode, FIRST_STACK_REG);
3730 case X86_64_NO_CLASS:
3731 /* Zero sized array, struct or class. */
3736 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3737 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3738 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3741 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3742 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3743 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3744 && regclass[1] == X86_64_INTEGER_CLASS
3745 && (mode == CDImode || mode == TImode || mode == TFmode)
3746 && intreg[0] + 1 == intreg[1])
3747 return gen_rtx_REG (mode, intreg[0]);
3749 /* Otherwise figure out the entries of the PARALLEL. */
3750 for (i = 0; i < n; i++)
3752 switch (regclass[i])
3754 case X86_64_NO_CLASS:
3756 case X86_64_INTEGER_CLASS:
3757 case X86_64_INTEGERSI_CLASS:
3758 /* Merge TImodes on aligned occasions here too. */
3759 if (i * 8 + 8 > bytes)
3760 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3761 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3765 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3766 if (tmpmode == BLKmode)
3768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3769 gen_rtx_REG (tmpmode, *intreg),
3773 case X86_64_SSESF_CLASS:
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (SFmode,
3776 SSE_REGNO (sse_regno)),
3780 case X86_64_SSEDF_CLASS:
3781 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3782 gen_rtx_REG (DFmode,
3783 SSE_REGNO (sse_regno)),
3787 case X86_64_SSE_CLASS:
3788 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3792 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3793 gen_rtx_REG (tmpmode,
3794 SSE_REGNO (sse_regno)),
3796 if (tmpmode == TImode)
3805 /* Empty aligned struct, union or class. */
3809 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3810 for (i = 0; i < nexps; i++)
3811 XVECEXP (ret, 0, i) = exp [i];
3815 /* Update the data in CUM to advance over an argument of mode MODE
3816 and data type TYPE. (TYPE is null for libcalls where that information
3817 may not be available.) */
3820 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3821 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3837 cum->words += words;
3838 cum->nregs -= words;
3839 cum->regno += words;
3841 if (cum->nregs <= 0)
3849 if (cum->float_in_sse < 2)
3852 if (cum->float_in_sse < 1)
3863 if (!type || !AGGREGATE_TYPE_P (type))
3865 cum->sse_words += words;
3866 cum->sse_nregs -= 1;
3867 cum->sse_regno += 1;
3868 if (cum->sse_nregs <= 0)
3880 if (!type || !AGGREGATE_TYPE_P (type))
3882 cum->mmx_words += words;
3883 cum->mmx_nregs -= 1;
3884 cum->mmx_regno += 1;
3885 if (cum->mmx_nregs <= 0)
3896 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3897 tree type, HOST_WIDE_INT words)
3899 int int_nregs, sse_nregs;
3901 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3902 cum->words += words;
3903 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3905 cum->nregs -= int_nregs;
3906 cum->sse_nregs -= sse_nregs;
3907 cum->regno += int_nregs;
3908 cum->sse_regno += sse_nregs;
3911 cum->words += words;
3915 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3916 HOST_WIDE_INT words)
3918 /* Otherwise, this should be passed indirect. */
3919 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3921 cum->words += words;
3930 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3931 tree type, int named ATTRIBUTE_UNUSED)
3933 HOST_WIDE_INT bytes, words;
3935 if (mode == BLKmode)
3936 bytes = int_size_in_bytes (type);
3938 bytes = GET_MODE_SIZE (mode);
3939 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3942 mode = type_natural_mode (type);
3944 if (TARGET_64BIT_MS_ABI)
3945 function_arg_advance_ms_64 (cum, bytes, words);
3946 else if (TARGET_64BIT)
3947 function_arg_advance_64 (cum, mode, type, words);
3949 function_arg_advance_32 (cum, mode, type, bytes, words);
3952 /* Define where to put the arguments to a function.
3953 Value is zero to push the argument on the stack,
3954 or a hard register in which to store the argument.
3956 MODE is the argument's machine mode.
3957 TYPE is the data type of the argument (as a tree).
3958 This is null for libcalls where that information may
3960 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3961 the preceding args and about the function being called.
3962 NAMED is nonzero if this argument is a named parameter
3963 (otherwise it is an extra parameter matching an ellipsis). */
3966 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3967 enum machine_mode orig_mode, tree type,
3968 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3970 static bool warnedsse, warnedmmx;
3972 /* Avoid the AL settings for the Unix64 ABI. */
3973 if (mode == VOIDmode)
3989 if (words <= cum->nregs)
3991 int regno = cum->regno;
3993 /* Fastcall allocates the first two DWORD (SImode) or
3994 smaller arguments to ECX and EDX. */
3997 if (mode == BLKmode || mode == DImode)
4000 /* ECX not EAX is the first allocated register. */
4004 return gen_rtx_REG (mode, regno);
4009 if (cum->float_in_sse < 2)
4012 if (cum->float_in_sse < 1)
4022 if (!type || !AGGREGATE_TYPE_P (type))
4024 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4027 warning (0, "SSE vector argument without SSE enabled "
4031 return gen_reg_or_parallel (mode, orig_mode,
4032 cum->sse_regno + FIRST_SSE_REG);
4040 if (!type || !AGGREGATE_TYPE_P (type))
4042 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4045 warning (0, "MMX vector argument without MMX enabled "
4049 return gen_reg_or_parallel (mode, orig_mode,
4050 cum->mmx_regno + FIRST_MMX_REG);
4059 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4060 enum machine_mode orig_mode, tree type)
4062 /* Handle a hidden AL argument containing number of registers
4063 for varargs x86-64 functions. */
4064 if (mode == VOIDmode)
4065 return GEN_INT (cum->maybe_vaarg
4066 ? (cum->sse_nregs < 0
4071 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4073 &x86_64_int_parameter_registers [cum->regno],
4078 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4079 enum machine_mode orig_mode, int named)
4083 /* Avoid the AL settings for the Unix64 ABI. */
4084 if (mode == VOIDmode)
4087 /* If we've run out of registers, it goes on the stack. */
4088 if (cum->nregs == 0)
4091 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4093 /* Only floating point modes are passed in anything but integer regs. */
4094 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4097 regno = cum->regno + FIRST_SSE_REG;
4102 /* Unnamed floating parameters are passed in both the
4103 SSE and integer registers. */
4104 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4105 t2 = gen_rtx_REG (mode, regno);
4106 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4107 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4108 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4112 return gen_reg_or_parallel (mode, orig_mode, regno);
4116 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4117 tree type, int named)
4119 enum machine_mode mode = omode;
4120 HOST_WIDE_INT bytes, words;
4122 if (mode == BLKmode)
4123 bytes = int_size_in_bytes (type);
4125 bytes = GET_MODE_SIZE (mode);
4126 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4128 /* To simplify the code below, represent vector types with a vector mode
4129 even if MMX/SSE are not active. */
4130 if (type && TREE_CODE (type) == VECTOR_TYPE)
4131 mode = type_natural_mode (type);
4133 if (TARGET_64BIT_MS_ABI)
4134 return function_arg_ms_64 (cum, mode, omode, named);
4135 else if (TARGET_64BIT)
4136 return function_arg_64 (cum, mode, omode, type);
4138 return function_arg_32 (cum, mode, omode, type, bytes, words);
4141 /* A C expression that indicates when an argument must be passed by
4142 reference. If nonzero for an argument, a copy of that argument is
4143 made in memory and a pointer to the argument is passed instead of
4144 the argument itself. The pointer is passed in whatever way is
4145 appropriate for passing a pointer to that type. */
4148 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4149 enum machine_mode mode ATTRIBUTE_UNUSED,
4150 tree type, bool named ATTRIBUTE_UNUSED)
4152 if (TARGET_64BIT_MS_ABI)
4156 /* Arrays are passed by reference. */
4157 if (TREE_CODE (type) == ARRAY_TYPE)
4160 if (AGGREGATE_TYPE_P (type))
4162 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4163 are passed by reference. */
4164 int el2 = exact_log2 (int_size_in_bytes (type));
4165 return !(el2 >= 0 && el2 <= 3);
4169 /* __m128 is passed by reference. */
4170 /* ??? How to handle complex? For now treat them as structs,
4171 and pass them by reference if they're too large. */
4172 if (GET_MODE_SIZE (mode) > 8)
4175 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4181 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4182 ABI. Only called if TARGET_SSE. */
4184 contains_128bit_aligned_vector_p (tree type)
4186 enum machine_mode mode = TYPE_MODE (type);
4187 if (SSE_REG_MODE_P (mode)
4188 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4190 if (TYPE_ALIGN (type) < 128)
4193 if (AGGREGATE_TYPE_P (type))
4195 /* Walk the aggregates recursively. */
4196 switch (TREE_CODE (type))
4200 case QUAL_UNION_TYPE:
4204 /* Walk all the structure fields. */
4205 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4207 if (TREE_CODE (field) == FIELD_DECL
4208 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4215 /* Just for use if some languages passes arrays by value. */
4216 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4227 /* Gives the alignment boundary, in bits, of an argument with the
4228 specified mode and type. */
4231 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4235 align = TYPE_ALIGN (type);
4237 align = GET_MODE_ALIGNMENT (mode);
4238 if (align < PARM_BOUNDARY)
4239 align = PARM_BOUNDARY;
4242 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4243 make an exception for SSE modes since these require 128bit
4246 The handling here differs from field_alignment. ICC aligns MMX
4247 arguments to 4 byte boundaries, while structure fields are aligned
4248 to 8 byte boundaries. */
4250 align = PARM_BOUNDARY;
4253 if (!SSE_REG_MODE_P (mode))
4254 align = PARM_BOUNDARY;
4258 if (!contains_128bit_aligned_vector_p (type))
4259 align = PARM_BOUNDARY;
4267 /* Return true if N is a possible register number of function value. */
4270 ix86_function_value_regno_p (int regno)
4277 case FIRST_FLOAT_REG:
4278 if (TARGET_64BIT_MS_ABI)
4280 return TARGET_FLOAT_RETURNS_IN_80387;
4286 if (TARGET_MACHO || TARGET_64BIT)
4294 /* Define how to find the value returned by a function.
4295 VALTYPE is the data type of the value (as a tree).
4296 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4297 otherwise, FUNC is 0. */
4300 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4301 tree fntype, tree fn)
4305 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4306 we normally prevent this case when mmx is not available. However
4307 some ABIs may require the result to be returned like DImode. */
4308 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4309 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4311 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4312 we prevent this case when sse is not available. However some ABIs
4313 may require the result to be returned like integer TImode. */
4314 else if (mode == TImode
4315 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4316 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4318 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4319 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4320 regno = FIRST_FLOAT_REG;
4322 /* Most things go in %eax. */
4325 /* Override FP return register with %xmm0 for local functions when
4326 SSE math is enabled or for functions with sseregparm attribute. */
4327 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4329 int sse_level = ix86_function_sseregparm (fntype, fn);
4330 if ((sse_level >= 1 && mode == SFmode)
4331 || (sse_level == 2 && mode == DFmode))
4332 regno = FIRST_SSE_REG;
4335 return gen_rtx_REG (orig_mode, regno);
4339 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4344 /* Handle libcalls, which don't provide a type node. */
4345 if (valtype == NULL)
4357 return gen_rtx_REG (mode, FIRST_SSE_REG);
4360 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4364 return gen_rtx_REG (mode, 0);
4368 ret = construct_container (mode, orig_mode, valtype, 1,
4369 REGPARM_MAX, SSE_REGPARM_MAX,
4370 x86_64_int_return_registers, 0);
4372 /* For zero sized structures, construct_container returns NULL, but we
4373 need to keep rest of compiler happy by returning meaningful value. */
4375 ret = gen_rtx_REG (orig_mode, 0);
4381 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4383 unsigned int regno = 0;
4387 if (mode == SFmode || mode == DFmode)
4388 regno = FIRST_SSE_REG;
4389 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4390 regno = FIRST_SSE_REG;
4393 return gen_rtx_REG (orig_mode, regno);
4397 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4398 enum machine_mode orig_mode, enum machine_mode mode)
4403 if (fntype_or_decl && DECL_P (fntype_or_decl))
4404 fn = fntype_or_decl;
4405 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4407 if (TARGET_64BIT_MS_ABI)
4408 return function_value_ms_64 (orig_mode, mode);
4409 else if (TARGET_64BIT)
4410 return function_value_64 (orig_mode, mode, valtype);
4412 return function_value_32 (orig_mode, mode, fntype, fn);
4416 ix86_function_value (tree valtype, tree fntype_or_decl,
4417 bool outgoing ATTRIBUTE_UNUSED)
4419 enum machine_mode mode, orig_mode;
4421 orig_mode = TYPE_MODE (valtype);
4422 mode = type_natural_mode (valtype);
4423 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4427 ix86_libcall_value (enum machine_mode mode)
4429 return ix86_function_value_1 (NULL, NULL, mode, mode);
4432 /* Return true iff type is returned in memory. */
4435 return_in_memory_32 (tree type, enum machine_mode mode)
4439 if (mode == BLKmode)
4442 size = int_size_in_bytes (type);
4444 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4447 if (VECTOR_MODE_P (mode) || mode == TImode)
4449 /* User-created vectors small enough to fit in EAX. */
4453 /* MMX/3dNow values are returned in MM0,
4454 except when it doesn't exits. */
4456 return (TARGET_MMX ? 0 : 1);
4458 /* SSE values are returned in XMM0, except when it doesn't exist. */
4460 return (TARGET_SSE ? 0 : 1);
4475 return_in_memory_64 (tree type, enum machine_mode mode)
4477 int needed_intregs, needed_sseregs;
4478 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4482 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4484 HOST_WIDE_INT size = int_size_in_bytes (type);
4486 /* __m128 and friends are returned in xmm0. */
4487 if (size == 16 && VECTOR_MODE_P (mode))
4490 /* Otherwise, the size must be exactly in [1248]. */
4491 return (size != 1 && size != 2 && size != 4 && size != 8);
4495 ix86_return_in_memory (tree type)
4497 enum machine_mode mode = type_natural_mode (type);
4499 if (TARGET_64BIT_MS_ABI)
4500 return return_in_memory_ms_64 (type, mode);
4501 else if (TARGET_64BIT)
4502 return return_in_memory_64 (type, mode);
4504 return return_in_memory_32 (type, mode);
4507 /* Return false iff TYPE is returned in memory. This version is used
4508 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4509 but differs notably in that when MMX is available, 8-byte vectors
4510 are returned in memory, rather than in MMX registers. */
4513 ix86_sol10_return_in_memory (tree type)
4516 enum machine_mode mode = type_natural_mode (type);
4519 return return_in_memory_64 (type, mode);
4521 if (mode == BLKmode)
4524 size = int_size_in_bytes (type);
4526 if (VECTOR_MODE_P (mode))
4528 /* Return in memory only if MMX registers *are* available. This
4529 seems backwards, but it is consistent with the existing
4536 else if (mode == TImode)
4538 else if (mode == XFmode)
4544 /* When returning SSE vector types, we have a choice of either
4545 (1) being abi incompatible with a -march switch, or
4546 (2) generating an error.
4547 Given no good solution, I think the safest thing is one warning.
4548 The user won't be able to use -Werror, but....
4550 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4551 called in response to actually generating a caller or callee that
4552 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4553 via aggregate_value_p for general type probing from tree-ssa. */
4556 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4558 static bool warnedsse, warnedmmx;
4560 if (!TARGET_64BIT && type)
4562 /* Look at the return type of the function, not the function type. */
4563 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4565 if (!TARGET_SSE && !warnedsse)
4568 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4571 warning (0, "SSE vector return without SSE enabled "
4576 if (!TARGET_MMX && !warnedmmx)
4578 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4581 warning (0, "MMX vector return without MMX enabled "
4591 /* Create the va_list data type. */
4594 ix86_build_builtin_va_list (void)
4596 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4598 /* For i386 we use plain pointer to argument area. */
4599 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4600 return build_pointer_type (char_type_node);
4602 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4603 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4605 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4606 unsigned_type_node);
4607 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4608 unsigned_type_node);
4609 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4611 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4614 va_list_gpr_counter_field = f_gpr;
4615 va_list_fpr_counter_field = f_fpr;
4617 DECL_FIELD_CONTEXT (f_gpr) = record;
4618 DECL_FIELD_CONTEXT (f_fpr) = record;
4619 DECL_FIELD_CONTEXT (f_ovf) = record;
4620 DECL_FIELD_CONTEXT (f_sav) = record;
4622 TREE_CHAIN (record) = type_decl;
4623 TYPE_NAME (record) = type_decl;
4624 TYPE_FIELDS (record) = f_gpr;
4625 TREE_CHAIN (f_gpr) = f_fpr;
4626 TREE_CHAIN (f_fpr) = f_ovf;
4627 TREE_CHAIN (f_ovf) = f_sav;
4629 layout_type (record);
4631 /* The correct type is an array type of one element. */
4632 return build_array_type (record, build_index_type (size_zero_node));
4635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4638 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4648 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4651 /* Indicate to allocate space on the stack for varargs save area. */
4652 ix86_save_varrargs_registers = 1;
4653 cfun->stack_alignment_needed = 128;
4655 save_area = frame_pointer_rtx;
4656 set = get_varargs_alias_set ();
4658 for (i = cum->regno;
4660 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4663 mem = gen_rtx_MEM (Pmode,
4664 plus_constant (save_area, i * UNITS_PER_WORD));
4665 MEM_NOTRAP_P (mem) = 1;
4666 set_mem_alias_set (mem, set);
4667 emit_move_insn (mem, gen_rtx_REG (Pmode,
4668 x86_64_int_parameter_registers[i]));
4671 if (cum->sse_nregs && cfun->va_list_fpr_size)
4673 /* Now emit code to save SSE registers. The AX parameter contains number
4674 of SSE parameter registers used to call this function. We use
4675 sse_prologue_save insn template that produces computed jump across
4676 SSE saves. We need some preparation work to get this working. */
4678 label = gen_label_rtx ();
4679 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4681 /* Compute address to jump to :
4682 label - 5*eax + nnamed_sse_arguments*5 */
4683 tmp_reg = gen_reg_rtx (Pmode);
4684 nsse_reg = gen_reg_rtx (Pmode);
4685 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4686 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4687 gen_rtx_MULT (Pmode, nsse_reg,
4692 gen_rtx_CONST (DImode,
4693 gen_rtx_PLUS (DImode,
4695 GEN_INT (cum->sse_regno * 4))));
4697 emit_move_insn (nsse_reg, label_ref);
4698 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4700 /* Compute address of memory block we save into. We always use pointer
4701 pointing 127 bytes after first byte to store - this is needed to keep
4702 instruction size limited by 4 bytes. */
4703 tmp_reg = gen_reg_rtx (Pmode);
4704 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4705 plus_constant (save_area,
4706 8 * REGPARM_MAX + 127)));
4707 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4708 MEM_NOTRAP_P (mem) = 1;
4709 set_mem_alias_set (mem, set);
4710 set_mem_align (mem, BITS_PER_WORD);
4712 /* And finally do the dirty job! */
4713 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4714 GEN_INT (cum->sse_regno), label));
4719 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4721 int set = get_varargs_alias_set ();
4724 for (i = cum->regno; i < REGPARM_MAX; i++)
4728 mem = gen_rtx_MEM (Pmode,
4729 plus_constant (virtual_incoming_args_rtx,
4730 i * UNITS_PER_WORD));
4731 MEM_NOTRAP_P (mem) = 1;
4732 set_mem_alias_set (mem, set);
4734 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4735 emit_move_insn (mem, reg);
4740 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4741 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4744 CUMULATIVE_ARGS next_cum;
4748 /* This argument doesn't appear to be used anymore. Which is good,
4749 because the old code here didn't suppress rtl generation. */
4750 gcc_assert (!no_rtl);
4755 fntype = TREE_TYPE (current_function_decl);
4756 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4757 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4758 != void_type_node));
4760 /* For varargs, we do not want to skip the dummy va_dcl argument.
4761 For stdargs, we do want to skip the last named argument. */
4764 function_arg_advance (&next_cum, mode, type, 1);
4766 if (TARGET_64BIT_MS_ABI)
4767 setup_incoming_varargs_ms_64 (&next_cum);
4769 setup_incoming_varargs_64 (&next_cum);
4772 /* Implement va_start. */
4775 ix86_va_start (tree valist, rtx nextarg)
4777 HOST_WIDE_INT words, n_gpr, n_fpr;
4778 tree f_gpr, f_fpr, f_ovf, f_sav;
4779 tree gpr, fpr, ovf, sav, t;
4782 /* Only 64bit target needs something special. */
4783 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4785 std_expand_builtin_va_start (valist, nextarg);
4789 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4790 f_fpr = TREE_CHAIN (f_gpr);
4791 f_ovf = TREE_CHAIN (f_fpr);
4792 f_sav = TREE_CHAIN (f_ovf);
4794 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4795 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4796 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4797 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4798 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4800 /* Count number of gp and fp argument registers used. */
4801 words = current_function_args_info.words;
4802 n_gpr = current_function_args_info.regno;
4803 n_fpr = current_function_args_info.sse_regno;
4805 if (cfun->va_list_gpr_size)
4807 type = TREE_TYPE (gpr);
4808 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4809 build_int_cst (type, n_gpr * 8));
4810 TREE_SIDE_EFFECTS (t) = 1;
4811 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4814 if (cfun->va_list_fpr_size)
4816 type = TREE_TYPE (fpr);
4817 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4818 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4819 TREE_SIDE_EFFECTS (t) = 1;
4820 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4823 /* Find the overflow area. */
4824 type = TREE_TYPE (ovf);
4825 t = make_tree (type, virtual_incoming_args_rtx);
4827 t = build2 (PLUS_EXPR, type, t,
4828 build_int_cst (type, words * UNITS_PER_WORD));
4829 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4830 TREE_SIDE_EFFECTS (t) = 1;
4831 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4833 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4835 /* Find the register save area.
4836 Prologue of the function save it right above stack frame. */
4837 type = TREE_TYPE (sav);
4838 t = make_tree (type, frame_pointer_rtx);
4839 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4840 TREE_SIDE_EFFECTS (t) = 1;
4841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4845 /* Implement va_arg. */
4848 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4850 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4851 tree f_gpr, f_fpr, f_ovf, f_sav;
4852 tree gpr, fpr, ovf, sav, t;
4854 tree lab_false, lab_over = NULL_TREE;
4859 enum machine_mode nat_mode;
4861 /* Only 64bit target needs something special. */
4862 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4863 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4865 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4866 f_fpr = TREE_CHAIN (f_gpr);
4867 f_ovf = TREE_CHAIN (f_fpr);
4868 f_sav = TREE_CHAIN (f_ovf);
4870 valist = build_va_arg_indirect_ref (valist);
4871 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4872 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4873 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4874 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4876 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4878 type = build_pointer_type (type);
4879 size = int_size_in_bytes (type);
4880 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4882 nat_mode = type_natural_mode (type);
4883 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4884 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4886 /* Pull the value out of the saved registers. */
4888 addr = create_tmp_var (ptr_type_node, "addr");
4889 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4893 int needed_intregs, needed_sseregs;
4895 tree int_addr, sse_addr;
4897 lab_false = create_artificial_label ();
4898 lab_over = create_artificial_label ();
4900 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4902 need_temp = (!REG_P (container)
4903 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4904 || TYPE_ALIGN (type) > 128));
4906 /* In case we are passing structure, verify that it is consecutive block
4907 on the register save area. If not we need to do moves. */
4908 if (!need_temp && !REG_P (container))
4910 /* Verify that all registers are strictly consecutive */
4911 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4915 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4917 rtx slot = XVECEXP (container, 0, i);
4918 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4919 || INTVAL (XEXP (slot, 1)) != i * 16)
4927 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4929 rtx slot = XVECEXP (container, 0, i);
4930 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4931 || INTVAL (XEXP (slot, 1)) != i * 8)
4943 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4944 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4945 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4946 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4949 /* First ensure that we fit completely in registers. */
4952 t = build_int_cst (TREE_TYPE (gpr),
4953 (REGPARM_MAX - needed_intregs + 1) * 8);
4954 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4955 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4956 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4957 gimplify_and_add (t, pre_p);
4961 t = build_int_cst (TREE_TYPE (fpr),
4962 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4964 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4965 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4966 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4967 gimplify_and_add (t, pre_p);
4970 /* Compute index to start of area used for integer regs. */
4973 /* int_addr = gpr + sav; */
4974 t = fold_convert (ptr_type_node, fold_convert (size_type_node, gpr));
4975 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4976 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4977 gimplify_and_add (t, pre_p);
4981 /* sse_addr = fpr + sav; */
4982 t = fold_convert (ptr_type_node, fold_convert (size_type_node, fpr));
4983 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4984 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4985 gimplify_and_add (t, pre_p);
4990 tree temp = create_tmp_var (type, "va_arg_tmp");
4993 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4994 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4995 gimplify_and_add (t, pre_p);
4997 for (i = 0; i < XVECLEN (container, 0); i++)
4999 rtx slot = XVECEXP (container, 0, i);
5000 rtx reg = XEXP (slot, 0);
5001 enum machine_mode mode = GET_MODE (reg);
5002 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5003 tree addr_type = build_pointer_type (piece_type);
5006 tree dest_addr, dest;
5008 if (SSE_REGNO_P (REGNO (reg)))
5010 src_addr = sse_addr;
5011 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5015 src_addr = int_addr;
5016 src_offset = REGNO (reg) * 8;
5018 src_addr = fold_convert (addr_type, src_addr);
5019 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
5020 build_int_cst (addr_type, src_offset));
5021 src = build_va_arg_indirect_ref (src_addr);
5023 dest_addr = fold_convert (addr_type, addr);
5024 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
5025 build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
5026 dest = build_va_arg_indirect_ref (dest_addr);
5028 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5029 gimplify_and_add (t, pre_p);
5035 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5036 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5037 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5038 gimplify_and_add (t, pre_p);
5042 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5043 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5044 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5045 gimplify_and_add (t, pre_p);
5048 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5049 gimplify_and_add (t, pre_p);
5051 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5052 append_to_statement_list (t, pre_p);
5055 /* ... otherwise out of the overflow area. */
5057 /* Care for on-stack alignment if needed. */
5058 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5059 || integer_zerop (TYPE_SIZE (type)))
5063 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5064 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
5065 build_int_cst (TREE_TYPE (ovf), align - 1));
5066 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5067 build_int_cst (TREE_TYPE (t), -align));
5069 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5071 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5072 gimplify_and_add (t2, pre_p);
5074 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5075 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
5076 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5077 gimplify_and_add (t, pre_p);
5081 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5082 append_to_statement_list (t, pre_p);
5085 ptrtype = build_pointer_type (type);
5086 addr = fold_convert (ptrtype, addr);
5089 addr = build_va_arg_indirect_ref (addr);
5090 return build_va_arg_indirect_ref (addr);
5093 /* Return nonzero if OPNUM's MEM should be matched
5094 in movabs* patterns. */
5097 ix86_check_movabs (rtx insn, int opnum)
5101 set = PATTERN (insn);
5102 if (GET_CODE (set) == PARALLEL)
5103 set = XVECEXP (set, 0, 0);
5104 gcc_assert (GET_CODE (set) == SET);
5105 mem = XEXP (set, opnum);
5106 while (GET_CODE (mem) == SUBREG)
5107 mem = SUBREG_REG (mem);
5108 gcc_assert (MEM_P (mem));
5109 return (volatile_ok || !MEM_VOLATILE_P (mem));
5112 /* Initialize the table of extra 80387 mathematical constants. */
5115 init_ext_80387_constants (void)
5117 static const char * cst[5] =
5119 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5120 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5121 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5122 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5123 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5127 for (i = 0; i < 5; i++)
5129 real_from_string (&ext_80387_constants_table[i], cst[i]);
5130 /* Ensure each constant is rounded to XFmode precision. */
5131 real_convert (&ext_80387_constants_table[i],
5132 XFmode, &ext_80387_constants_table[i]);
5135 ext_80387_constants_init = 1;
5138 /* Return true if the constant is something that can be loaded with
5139 a special instruction. */
5142 standard_80387_constant_p (rtx x)
5144 enum machine_mode mode = GET_MODE (x);
5148 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5151 if (x == CONST0_RTX (mode))
5153 if (x == CONST1_RTX (mode))
5156 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5158 /* For XFmode constants, try to find a special 80387 instruction when
5159 optimizing for size or on those CPUs that benefit from them. */
5161 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5165 if (! ext_80387_constants_init)
5166 init_ext_80387_constants ();
5168 for (i = 0; i < 5; i++)
5169 if (real_identical (&r, &ext_80387_constants_table[i]))
5173 /* Load of the constant -0.0 or -1.0 will be split as
5174 fldz;fchs or fld1;fchs sequence. */
5175 if (real_isnegzero (&r))
5177 if (real_identical (&r, &dconstm1))
5183 /* Return the opcode of the special instruction to be used to load
5187 standard_80387_constant_opcode (rtx x)
5189 switch (standard_80387_constant_p (x))
5213 /* Return the CONST_DOUBLE representing the 80387 constant that is
5214 loaded by the specified special instruction. The argument IDX
5215 matches the return value from standard_80387_constant_p. */
5218 standard_80387_constant_rtx (int idx)
5222 if (! ext_80387_constants_init)
5223 init_ext_80387_constants ();
5239 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5243 /* Return 1 if mode is a valid mode for sse. */
5245 standard_sse_mode_p (enum machine_mode mode)
5262 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5265 standard_sse_constant_p (rtx x)
5267 enum machine_mode mode = GET_MODE (x);
5269 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5271 if (vector_all_ones_operand (x, mode)
5272 && standard_sse_mode_p (mode))
5273 return TARGET_SSE2 ? 2 : -1;
5278 /* Return the opcode of the special instruction to be used to load
5282 standard_sse_constant_opcode (rtx insn, rtx x)
5284 switch (standard_sse_constant_p (x))
5287 if (get_attr_mode (insn) == MODE_V4SF)
5288 return "xorps\t%0, %0";
5289 else if (get_attr_mode (insn) == MODE_V2DF)
5290 return "xorpd\t%0, %0";
5292 return "pxor\t%0, %0";
5294 return "pcmpeqd\t%0, %0";
5299 /* Returns 1 if OP contains a symbol reference */
5302 symbolic_reference_mentioned_p (rtx op)
5307 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5310 fmt = GET_RTX_FORMAT (GET_CODE (op));
5311 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5317 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5318 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5322 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5329 /* Return 1 if it is appropriate to emit `ret' instructions in the
5330 body of a function. Do this only if the epilogue is simple, needing a
5331 couple of insns. Prior to reloading, we can't tell how many registers
5332 must be saved, so return 0 then. Return 0 if there is no frame
5333 marker to de-allocate. */
5336 ix86_can_use_return_insn_p (void)
5338 struct ix86_frame frame;
5340 if (! reload_completed || frame_pointer_needed)
5343 /* Don't allow more than 32 pop, since that's all we can do
5344 with one instruction. */
5345 if (current_function_pops_args
5346 && current_function_args_size >= 32768)
5349 ix86_compute_frame_layout (&frame);
5350 return frame.to_allocate == 0 && frame.nregs == 0;
5353 /* Value should be nonzero if functions must have frame pointers.
5354 Zero means the frame pointer need not be set up (and parms may
5355 be accessed via the stack pointer) in functions that seem suitable. */
5358 ix86_frame_pointer_required (void)
5360 /* If we accessed previous frames, then the generated code expects
5361 to be able to access the saved ebp value in our frame. */
5362 if (cfun->machine->accesses_prev_frame)
5365 /* Several x86 os'es need a frame pointer for other reasons,
5366 usually pertaining to setjmp. */
5367 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5370 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5371 the frame pointer by default. Turn it back on now if we've not
5372 got a leaf function. */
5373 if (TARGET_OMIT_LEAF_FRAME_POINTER
5374 && (!current_function_is_leaf
5375 || ix86_current_function_calls_tls_descriptor))
5378 if (current_function_profile)
5384 /* Record that the current function accesses previous call frames. */
5387 ix86_setup_frame_addresses (void)
5389 cfun->machine->accesses_prev_frame = 1;
5392 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5393 # define USE_HIDDEN_LINKONCE 1
5395 # define USE_HIDDEN_LINKONCE 0
5398 static int pic_labels_used;
5400 /* Fills in the label name that should be used for a pc thunk for
5401 the given register. */
5404 get_pc_thunk_name (char name[32], unsigned int regno)
5406 gcc_assert (!TARGET_64BIT);
5408 if (USE_HIDDEN_LINKONCE)
5409 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5411 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5415 /* This function generates code for -fpic that loads %ebx with
5416 the return address of the caller and then returns. */
5419 ix86_file_end (void)
5424 for (regno = 0; regno < 8; ++regno)
5428 if (! ((pic_labels_used >> regno) & 1))
5431 get_pc_thunk_name (name, regno);
5436 switch_to_section (darwin_sections[text_coal_section]);
5437 fputs ("\t.weak_definition\t", asm_out_file);
5438 assemble_name (asm_out_file, name);
5439 fputs ("\n\t.private_extern\t", asm_out_file);
5440 assemble_name (asm_out_file, name);
5441 fputs ("\n", asm_out_file);
5442 ASM_OUTPUT_LABEL (asm_out_file, name);
5446 if (USE_HIDDEN_LINKONCE)
5450 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5452 TREE_PUBLIC (decl) = 1;
5453 TREE_STATIC (decl) = 1;
5454 DECL_ONE_ONLY (decl) = 1;
5456 (*targetm.asm_out.unique_section) (decl, 0);
5457 switch_to_section (get_named_section (decl, NULL, 0));
5459 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5460 fputs ("\t.hidden\t", asm_out_file);
5461 assemble_name (asm_out_file, name);
5462 fputc ('\n', asm_out_file);
5463 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5467 switch_to_section (text_section);
5468 ASM_OUTPUT_LABEL (asm_out_file, name);
5471 xops[0] = gen_rtx_REG (SImode, regno);
5472 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5473 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5474 output_asm_insn ("ret", xops);
5477 if (NEED_INDICATE_EXEC_STACK)
5478 file_end_indicate_exec_stack ();
5481 /* Emit code for the SET_GOT patterns. */
5484 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5490 if (TARGET_VXWORKS_RTP && flag_pic)
5492 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5493 xops[2] = gen_rtx_MEM (Pmode,
5494 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5495 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5497 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5498 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5499 an unadorned address. */
5500 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5501 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5502 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5506 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5508 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5510 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5513 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5515 output_asm_insn ("call\t%a2", xops);
5518 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5519 is what will be referenced by the Mach-O PIC subsystem. */
5521 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5524 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5525 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5528 output_asm_insn ("pop{l}\t%0", xops);
5533 get_pc_thunk_name (name, REGNO (dest));
5534 pic_labels_used |= 1 << REGNO (dest);
5536 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5537 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5538 output_asm_insn ("call\t%X2", xops);
5539 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5540 is what will be referenced by the Mach-O PIC subsystem. */
5543 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5545 targetm.asm_out.internal_label (asm_out_file, "L",
5546 CODE_LABEL_NUMBER (label));
5553 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5554 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5556 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5561 /* Generate an "push" pattern for input ARG. */
5566 return gen_rtx_SET (VOIDmode,
5568 gen_rtx_PRE_DEC (Pmode,
5569 stack_pointer_rtx)),
5573 /* Return >= 0 if there is an unused call-clobbered register available
5574 for the entire function. */
5577 ix86_select_alt_pic_regnum (void)
5579 if (current_function_is_leaf && !current_function_profile
5580 && !ix86_current_function_calls_tls_descriptor)
5583 for (i = 2; i >= 0; --i)
5584 if (!df_regs_ever_live_p (i))
5588 return INVALID_REGNUM;
5591 /* Return 1 if we need to save REGNO. */
5593 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5595 if (pic_offset_table_rtx
5596 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5597 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5598 || current_function_profile
5599 || current_function_calls_eh_return
5600 || current_function_uses_const_pool))
5602 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5607 if (current_function_calls_eh_return && maybe_eh_return)
5612 unsigned test = EH_RETURN_DATA_REGNO (i);
5613 if (test == INVALID_REGNUM)
5620 if (cfun->machine->force_align_arg_pointer
5621 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5624 return (df_regs_ever_live_p (regno)
5625 && !call_used_regs[regno]
5626 && !fixed_regs[regno]
5627 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5630 /* Return number of registers to be saved on the stack. */
5633 ix86_nsaved_regs (void)
5638 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5639 if (ix86_save_reg (regno, true))
5644 /* Return the offset between two registers, one to be eliminated, and the other
5645 its replacement, at the start of a routine. */
5648 ix86_initial_elimination_offset (int from, int to)
5650 struct ix86_frame frame;
5651 ix86_compute_frame_layout (&frame);
5653 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5654 return frame.hard_frame_pointer_offset;
5655 else if (from == FRAME_POINTER_REGNUM
5656 && to == HARD_FRAME_POINTER_REGNUM)
5657 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5660 gcc_assert (to == STACK_POINTER_REGNUM);
5662 if (from == ARG_POINTER_REGNUM)
5663 return frame.stack_pointer_offset;
5665 gcc_assert (from == FRAME_POINTER_REGNUM);
5666 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5670 /* Fill structure ix86_frame about frame of currently computed function. */
5673 ix86_compute_frame_layout (struct ix86_frame *frame)
5675 HOST_WIDE_INT total_size;
5676 unsigned int stack_alignment_needed;
5677 HOST_WIDE_INT offset;
5678 unsigned int preferred_alignment;
5679 HOST_WIDE_INT size = get_frame_size ();
5681 frame->nregs = ix86_nsaved_regs ();
5684 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5685 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5687 /* During reload iteration the amount of registers saved can change.
5688 Recompute the value as needed. Do not recompute when amount of registers
5689 didn't change as reload does multiple calls to the function and does not
5690 expect the decision to change within single iteration. */
5692 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5694 int count = frame->nregs;
5696 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5697 /* The fast prologue uses move instead of push to save registers. This
5698 is significantly longer, but also executes faster as modern hardware
5699 can execute the moves in parallel, but can't do that for push/pop.
5701 Be careful about choosing what prologue to emit: When function takes
5702 many instructions to execute we may use slow version as well as in
5703 case function is known to be outside hot spot (this is known with
5704 feedback only). Weight the size of function by number of registers
5705 to save as it is cheap to use one or two push instructions but very
5706 slow to use many of them. */
5708 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5709 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5710 || (flag_branch_probabilities
5711 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5712 cfun->machine->use_fast_prologue_epilogue = false;
5714 cfun->machine->use_fast_prologue_epilogue
5715 = !expensive_function_p (count);
5717 if (TARGET_PROLOGUE_USING_MOVE
5718 && cfun->machine->use_fast_prologue_epilogue)
5719 frame->save_regs_using_mov = true;
5721 frame->save_regs_using_mov = false;
5724 /* Skip return address and saved base pointer. */
5725 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5727 frame->hard_frame_pointer_offset = offset;
5729 /* Do some sanity checking of stack_alignment_needed and
5730 preferred_alignment, since i386 port is the only using those features
5731 that may break easily. */
5733 gcc_assert (!size || stack_alignment_needed);
5734 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5735 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5736 gcc_assert (stack_alignment_needed
5737 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5739 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5740 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5742 /* Register save area */
5743 offset += frame->nregs * UNITS_PER_WORD;
5746 if (ix86_save_varrargs_registers)
5748 offset += X86_64_VARARGS_SIZE;
5749 frame->va_arg_size = X86_64_VARARGS_SIZE;
5752 frame->va_arg_size = 0;
5754 /* Align start of frame for local function. */
5755 frame->padding1 = ((offset + stack_alignment_needed - 1)
5756 & -stack_alignment_needed) - offset;
5758 offset += frame->padding1;
5760 /* Frame pointer points here. */
5761 frame->frame_pointer_offset = offset;
5765 /* Add outgoing arguments area. Can be skipped if we eliminated
5766 all the function calls as dead code.
5767 Skipping is however impossible when function calls alloca. Alloca
5768 expander assumes that last current_function_outgoing_args_size
5769 of stack frame are unused. */
5770 if (ACCUMULATE_OUTGOING_ARGS
5771 && (!current_function_is_leaf || current_function_calls_alloca
5772 || ix86_current_function_calls_tls_descriptor))
5774 offset += current_function_outgoing_args_size;
5775 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5778 frame->outgoing_arguments_size = 0;
5780 /* Align stack boundary. Only needed if we're calling another function
5782 if (!current_function_is_leaf || current_function_calls_alloca
5783 || ix86_current_function_calls_tls_descriptor)
5784 frame->padding2 = ((offset + preferred_alignment - 1)
5785 & -preferred_alignment) - offset;
5787 frame->padding2 = 0;
5789 offset += frame->padding2;
5791 /* We've reached end of stack frame. */
5792 frame->stack_pointer_offset = offset;
5794 /* Size prologue needs to allocate. */
5795 frame->to_allocate =
5796 (size + frame->padding1 + frame->padding2
5797 + frame->outgoing_arguments_size + frame->va_arg_size);
5799 if ((!frame->to_allocate && frame->nregs <= 1)
5800 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5801 frame->save_regs_using_mov = false;
5803 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5804 && current_function_is_leaf
5805 && !ix86_current_function_calls_tls_descriptor)
5807 frame->red_zone_size = frame->to_allocate;
5808 if (frame->save_regs_using_mov)
5809 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5810 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5811 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5814 frame->red_zone_size = 0;
5815 frame->to_allocate -= frame->red_zone_size;
5816 frame->stack_pointer_offset -= frame->red_zone_size;
5818 fprintf (stderr, "\n");
5819 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5820 fprintf (stderr, "size: %ld\n", (long)size);
5821 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5822 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5823 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5824 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5825 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5826 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5827 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5828 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5829 (long)frame->hard_frame_pointer_offset);
5830 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5831 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5832 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5833 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5837 /* Emit code to save registers in the prologue. */
5840 ix86_emit_save_regs (void)
5845 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5846 if (ix86_save_reg (regno, true))
5848 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5849 RTX_FRAME_RELATED_P (insn) = 1;
5853 /* Emit code to save registers using MOV insns. First register
5854 is restored from POINTER + OFFSET. */
5856 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5861 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5862 if (ix86_save_reg (regno, true))
5864 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5866 gen_rtx_REG (Pmode, regno));
5867 RTX_FRAME_RELATED_P (insn) = 1;
5868 offset += UNITS_PER_WORD;
5872 /* Expand prologue or epilogue stack adjustment.
5873 The pattern exist to put a dependency on all ebp-based memory accesses.
5874 STYLE should be negative if instructions should be marked as frame related,
5875 zero if %r11 register is live and cannot be freely used and positive
5879 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5884 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5885 else if (x86_64_immediate_operand (offset, DImode))
5886 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5890 /* r11 is used by indirect sibcall return as well, set before the
5891 epilogue and used after the epilogue. ATM indirect sibcall
5892 shouldn't be used together with huge frame sizes in one
5893 function because of the frame_size check in sibcall.c. */
5895 r11 = gen_rtx_REG (DImode, R11_REG);
5896 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5898 RTX_FRAME_RELATED_P (insn) = 1;
5899 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5903 RTX_FRAME_RELATED_P (insn) = 1;
5906 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5909 ix86_internal_arg_pointer (void)
5911 bool has_force_align_arg_pointer =
5912 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5913 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5914 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5915 && DECL_NAME (current_function_decl)
5916 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5917 && DECL_FILE_SCOPE_P (current_function_decl))
5918 || ix86_force_align_arg_pointer
5919 || has_force_align_arg_pointer)
5921 /* Nested functions can't realign the stack due to a register
5923 if (DECL_CONTEXT (current_function_decl)
5924 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5926 if (ix86_force_align_arg_pointer)
5927 warning (0, "-mstackrealign ignored for nested functions");
5928 if (has_force_align_arg_pointer)
5929 error ("%s not supported for nested functions",
5930 ix86_force_align_arg_pointer_string);
5931 return virtual_incoming_args_rtx;
5933 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5934 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5937 return virtual_incoming_args_rtx;
5940 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5941 This is called from dwarf2out.c to emit call frame instructions
5942 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5944 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5946 rtx unspec = SET_SRC (pattern);
5947 gcc_assert (GET_CODE (unspec) == UNSPEC);
5951 case UNSPEC_REG_SAVE:
5952 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5953 SET_DEST (pattern));
5955 case UNSPEC_DEF_CFA:
5956 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5957 INTVAL (XVECEXP (unspec, 0, 0)));
5964 /* Expand the prologue into a bunch of separate insns. */
5967 ix86_expand_prologue (void)
5971 struct ix86_frame frame;
5972 HOST_WIDE_INT allocate;
5974 ix86_compute_frame_layout (&frame);
5976 if (cfun->machine->force_align_arg_pointer)
5980 /* Grab the argument pointer. */
5981 x = plus_constant (stack_pointer_rtx, 4);
5982 y = cfun->machine->force_align_arg_pointer;
5983 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5984 RTX_FRAME_RELATED_P (insn) = 1;
5986 /* The unwind info consists of two parts: install the fafp as the cfa,
5987 and record the fafp as the "save register" of the stack pointer.
5988 The later is there in order that the unwinder can see where it
5989 should restore the stack pointer across the and insn. */
5990 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5991 x = gen_rtx_SET (VOIDmode, y, x);
5992 RTX_FRAME_RELATED_P (x) = 1;
5993 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5995 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5996 RTX_FRAME_RELATED_P (y) = 1;
5997 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5998 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5999 REG_NOTES (insn) = x;
6001 /* Align the stack. */
6002 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6005 /* And here we cheat like madmen with the unwind info. We force the
6006 cfa register back to sp+4, which is exactly what it was at the
6007 start of the function. Re-pushing the return address results in
6008 the return at the same spot relative to the cfa, and thus is
6009 correct wrt the unwind info. */
6010 x = cfun->machine->force_align_arg_pointer;
6011 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6012 insn = emit_insn (gen_push (x));
6013 RTX_FRAME_RELATED_P (insn) = 1;
6016 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6017 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6018 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6019 REG_NOTES (insn) = x;
6022 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6023 slower on all targets. Also sdb doesn't like it. */
6025 if (frame_pointer_needed)
6027 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6028 RTX_FRAME_RELATED_P (insn) = 1;
6030 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6031 RTX_FRAME_RELATED_P (insn) = 1;
6034 allocate = frame.to_allocate;
6036 if (!frame.save_regs_using_mov)
6037 ix86_emit_save_regs ();
6039 allocate += frame.nregs * UNITS_PER_WORD;
6041 /* When using red zone we may start register saving before allocating
6042 the stack frame saving one cycle of the prologue. */
6043 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6044 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6045 : stack_pointer_rtx,
6046 -frame.nregs * UNITS_PER_WORD);
6050 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6051 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6052 GEN_INT (-allocate), -1);
6055 /* Only valid for Win32. */
6056 rtx eax = gen_rtx_REG (Pmode, 0);
6060 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6062 if (TARGET_64BIT_MS_ABI)
6065 eax_live = ix86_eax_live_at_start_p ();
6069 emit_insn (gen_push (eax));
6070 allocate -= UNITS_PER_WORD;
6073 emit_move_insn (eax, GEN_INT (allocate));
6076 insn = gen_allocate_stack_worker_64 (eax);
6078 insn = gen_allocate_stack_worker_32 (eax);
6079 insn = emit_insn (insn);
6080 RTX_FRAME_RELATED_P (insn) = 1;
6081 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6082 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6083 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6084 t, REG_NOTES (insn));
6088 if (frame_pointer_needed)
6089 t = plus_constant (hard_frame_pointer_rtx,
6092 - frame.nregs * UNITS_PER_WORD);
6094 t = plus_constant (stack_pointer_rtx, allocate);
6095 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6099 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6101 if (!frame_pointer_needed || !frame.to_allocate)
6102 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6104 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6105 -frame.nregs * UNITS_PER_WORD);
6108 pic_reg_used = false;
6109 if (pic_offset_table_rtx
6110 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6111 || current_function_profile))
6113 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6115 if (alt_pic_reg_used != INVALID_REGNUM)
6116 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6118 pic_reg_used = true;
6125 if (ix86_cmodel == CM_LARGE_PIC)
6127 rtx tmp_reg = gen_rtx_REG (DImode,
6128 FIRST_REX_INT_REG + 3 /* R11 */);
6129 rtx label = gen_label_rtx ();
6131 LABEL_PRESERVE_P (label) = 1;
6132 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6133 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6134 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6135 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6136 pic_offset_table_rtx, tmp_reg));
6139 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6142 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6145 /* Prevent function calls from be scheduled before the call to mcount.
6146 In the pic_reg_used case, make sure that the got load isn't deleted. */
6147 if (current_function_profile)
6150 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6151 emit_insn (gen_blockage ());
6155 /* Emit code to restore saved registers using MOV insns. First register
6156 is restored from POINTER + OFFSET. */
6158 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6159 int maybe_eh_return)
6162 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6164 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6165 if (ix86_save_reg (regno, maybe_eh_return))
6167 /* Ensure that adjust_address won't be forced to produce pointer
6168 out of range allowed by x86-64 instruction set. */
6169 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6173 r11 = gen_rtx_REG (DImode, R11_REG);
6174 emit_move_insn (r11, GEN_INT (offset));
6175 emit_insn (gen_adddi3 (r11, r11, pointer));
6176 base_address = gen_rtx_MEM (Pmode, r11);
6179 emit_move_insn (gen_rtx_REG (Pmode, regno),
6180 adjust_address (base_address, Pmode, offset));
6181 offset += UNITS_PER_WORD;
6185 /* Restore function stack, frame, and registers. */
6188 ix86_expand_epilogue (int style)
6191 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6192 struct ix86_frame frame;
6193 HOST_WIDE_INT offset;
6195 ix86_compute_frame_layout (&frame);
6197 /* Calculate start of saved registers relative to ebp. Special care
6198 must be taken for the normal return case of a function using
6199 eh_return: the eax and edx registers are marked as saved, but not
6200 restored along this path. */
6201 offset = frame.nregs;
6202 if (current_function_calls_eh_return && style != 2)
6204 offset *= -UNITS_PER_WORD;
6206 /* If we're only restoring one register and sp is not valid then
6207 using a move instruction to restore the register since it's
6208 less work than reloading sp and popping the register.
6210 The default code result in stack adjustment using add/lea instruction,
6211 while this code results in LEAVE instruction (or discrete equivalent),
6212 so it is profitable in some other cases as well. Especially when there
6213 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6214 and there is exactly one register to pop. This heuristic may need some
6215 tuning in future. */
6216 if ((!sp_valid && frame.nregs <= 1)
6217 || (TARGET_EPILOGUE_USING_MOVE
6218 && cfun->machine->use_fast_prologue_epilogue
6219 && (frame.nregs > 1 || frame.to_allocate))
6220 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6221 || (frame_pointer_needed && TARGET_USE_LEAVE
6222 && cfun->machine->use_fast_prologue_epilogue
6223 && frame.nregs == 1)
6224 || current_function_calls_eh_return)
6226 /* Restore registers. We can use ebp or esp to address the memory
6227 locations. If both are available, default to ebp, since offsets
6228 are known to be small. Only exception is esp pointing directly to the
6229 end of block of saved registers, where we may simplify addressing
6232 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6233 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6234 frame.to_allocate, style == 2);
6236 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6237 offset, style == 2);
6239 /* eh_return epilogues need %ecx added to the stack pointer. */
6242 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6244 if (frame_pointer_needed)
6246 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6247 tmp = plus_constant (tmp, UNITS_PER_WORD);
6248 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6250 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6251 emit_move_insn (hard_frame_pointer_rtx, tmp);
6253 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6258 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6259 tmp = plus_constant (tmp, (frame.to_allocate
6260 + frame.nregs * UNITS_PER_WORD));
6261 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6264 else if (!frame_pointer_needed)
6265 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6266 GEN_INT (frame.to_allocate
6267 + frame.nregs * UNITS_PER_WORD),
6269 /* If not an i386, mov & pop is faster than "leave". */
6270 else if (TARGET_USE_LEAVE || optimize_size
6271 || !cfun->machine->use_fast_prologue_epilogue)
6272 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6275 pro_epilogue_adjust_stack (stack_pointer_rtx,
6276 hard_frame_pointer_rtx,
6279 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6281 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6286 /* First step is to deallocate the stack frame so that we can
6287 pop the registers. */
6290 gcc_assert (frame_pointer_needed);
6291 pro_epilogue_adjust_stack (stack_pointer_rtx,
6292 hard_frame_pointer_rtx,
6293 GEN_INT (offset), style);
6295 else if (frame.to_allocate)
6296 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6297 GEN_INT (frame.to_allocate), style);
6299 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6300 if (ix86_save_reg (regno, false))
6303 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6305 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6307 if (frame_pointer_needed)
6309 /* Leave results in shorter dependency chains on CPUs that are
6310 able to grok it fast. */
6311 if (TARGET_USE_LEAVE)
6312 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6313 else if (TARGET_64BIT)
6314 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6316 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6320 if (cfun->machine->force_align_arg_pointer)
6322 emit_insn (gen_addsi3 (stack_pointer_rtx,
6323 cfun->machine->force_align_arg_pointer,
6327 /* Sibcall epilogues don't want a return instruction. */
6331 if (current_function_pops_args && current_function_args_size)
6333 rtx popc = GEN_INT (current_function_pops_args);
6335 /* i386 can only pop 64K bytes. If asked to pop more, pop
6336 return address, do explicit add, and jump indirectly to the
6339 if (current_function_pops_args >= 65536)
6341 rtx ecx = gen_rtx_REG (SImode, 2);
6343 /* There is no "pascal" calling convention in any 64bit ABI. */
6344 gcc_assert (!TARGET_64BIT);
6346 emit_insn (gen_popsi1 (ecx));
6347 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6348 emit_jump_insn (gen_return_indirect_internal (ecx));
6351 emit_jump_insn (gen_return_pop_internal (popc));
6354 emit_jump_insn (gen_return_internal ());
6357 /* Reset from the function's potential modifications. */
6360 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6361 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6363 if (pic_offset_table_rtx)
6364 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6366 /* Mach-O doesn't support labels at the end of objects, so if
6367 it looks like we might want one, insert a NOP. */
6369 rtx insn = get_last_insn ();
6372 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6373 insn = PREV_INSN (insn);
6377 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6378 fputs ("\tnop\n", file);
6384 /* Extract the parts of an RTL expression that is a valid memory address
6385 for an instruction. Return 0 if the structure of the address is
6386 grossly off. Return -1 if the address contains ASHIFT, so it is not
6387 strictly valid, but still used for computing length of lea instruction. */
6390 ix86_decompose_address (rtx addr, struct ix86_address *out)
6392 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6393 rtx base_reg, index_reg;
6394 HOST_WIDE_INT scale = 1;
6395 rtx scale_rtx = NULL_RTX;
6397 enum ix86_address_seg seg = SEG_DEFAULT;
6399 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6401 else if (GET_CODE (addr) == PLUS)
6411 addends[n++] = XEXP (op, 1);
6414 while (GET_CODE (op) == PLUS);
6419 for (i = n; i >= 0; --i)
6422 switch (GET_CODE (op))
6427 index = XEXP (op, 0);
6428 scale_rtx = XEXP (op, 1);
6432 if (XINT (op, 1) == UNSPEC_TP
6433 && TARGET_TLS_DIRECT_SEG_REFS
6434 && seg == SEG_DEFAULT)
6435 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6464 else if (GET_CODE (addr) == MULT)
6466 index = XEXP (addr, 0); /* index*scale */
6467 scale_rtx = XEXP (addr, 1);
6469 else if (GET_CODE (addr) == ASHIFT)
6473 /* We're called for lea too, which implements ashift on occasion. */
6474 index = XEXP (addr, 0);
6475 tmp = XEXP (addr, 1);
6476 if (!CONST_INT_P (tmp))
6478 scale = INTVAL (tmp);
6479 if ((unsigned HOST_WIDE_INT) scale > 3)
6485 disp = addr; /* displacement */
6487 /* Extract the integral value of scale. */
6490 if (!CONST_INT_P (scale_rtx))
6492 scale = INTVAL (scale_rtx);
6495 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6496 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6498 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6499 if (base_reg && index_reg && scale == 1
6500 && (index_reg == arg_pointer_rtx
6501 || index_reg == frame_pointer_rtx
6502 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6505 tmp = base, base = index, index = tmp;
6506 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6509 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6510 if ((base_reg == hard_frame_pointer_rtx
6511 || base_reg == frame_pointer_rtx
6512 || base_reg == arg_pointer_rtx) && !disp)
6515 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6516 Avoid this by transforming to [%esi+0]. */
6517 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6518 && base_reg && !index_reg && !disp
6520 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6523 /* Special case: encode reg+reg instead of reg*2. */
6524 if (!base && index && scale && scale == 2)
6525 base = index, base_reg = index_reg, scale = 1;
6527 /* Special case: scaling cannot be encoded without base or displacement. */
6528 if (!base && !disp && index && scale != 1)
6540 /* Return cost of the memory address x.
6541 For i386, it is better to use a complex address than let gcc copy
6542 the address into a reg and make a new pseudo. But not if the address
6543 requires to two regs - that would mean more pseudos with longer
6546 ix86_address_cost (rtx x)
6548 struct ix86_address parts;
6550 int ok = ix86_decompose_address (x, &parts);
6554 if (parts.base && GET_CODE (parts.base) == SUBREG)
6555 parts.base = SUBREG_REG (parts.base);
6556 if (parts.index && GET_CODE (parts.index) == SUBREG)
6557 parts.index = SUBREG_REG (parts.index);
6559 /* More complex memory references are better. */
6560 if (parts.disp && parts.disp != const0_rtx)
6562 if (parts.seg != SEG_DEFAULT)
6565 /* Attempt to minimize number of registers in the address. */
6567 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6569 && (!REG_P (parts.index)
6570 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6574 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6576 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6577 && parts.base != parts.index)
6580 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6581 since it's predecode logic can't detect the length of instructions
6582 and it degenerates to vector decoded. Increase cost of such
6583 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6584 to split such addresses or even refuse such addresses at all.
6586 Following addressing modes are affected:
6591 The first and last case may be avoidable by explicitly coding the zero in
6592 memory address, but I don't have AMD-K6 machine handy to check this
6596 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6597 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6598 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6604 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6605 this is used for to form addresses to local data when -fPIC is in
6609 darwin_local_data_pic (rtx disp)
6611 if (GET_CODE (disp) == MINUS)
6613 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6614 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6615 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6617 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6618 if (! strcmp (sym_name, "<pic base>"))
6626 /* Determine if a given RTX is a valid constant. We already know this
6627 satisfies CONSTANT_P. */
6630 legitimate_constant_p (rtx x)
6632 switch (GET_CODE (x))
6637 if (GET_CODE (x) == PLUS)
6639 if (!CONST_INT_P (XEXP (x, 1)))
6644 if (TARGET_MACHO && darwin_local_data_pic (x))
6647 /* Only some unspecs are valid as "constants". */
6648 if (GET_CODE (x) == UNSPEC)
6649 switch (XINT (x, 1))
6654 return TARGET_64BIT;
6657 x = XVECEXP (x, 0, 0);
6658 return (GET_CODE (x) == SYMBOL_REF
6659 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6661 x = XVECEXP (x, 0, 0);
6662 return (GET_CODE (x) == SYMBOL_REF
6663 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6668 /* We must have drilled down to a symbol. */
6669 if (GET_CODE (x) == LABEL_REF)
6671 if (GET_CODE (x) != SYMBOL_REF)
6676 /* TLS symbols are never valid. */
6677 if (SYMBOL_REF_TLS_MODEL (x))
6680 /* DLLIMPORT symbols are never valid. */
6681 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6682 && SYMBOL_REF_DLLIMPORT_P (x))
6687 if (GET_MODE (x) == TImode
6688 && x != CONST0_RTX (TImode)
6694 if (x == CONST0_RTX (GET_MODE (x)))
6702 /* Otherwise we handle everything else in the move patterns. */
6706 /* Determine if it's legal to put X into the constant pool. This
6707 is not possible for the address of thread-local symbols, which
6708 is checked above. */
6711 ix86_cannot_force_const_mem (rtx x)
6713 /* We can always put integral constants and vectors in memory. */
6714 switch (GET_CODE (x))
6724 return !legitimate_constant_p (x);
6727 /* Determine if a given RTX is a valid constant address. */
6730 constant_address_p (rtx x)
6732 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6735 /* Nonzero if the constant value X is a legitimate general operand
6736 when generating PIC code. It is given that flag_pic is on and
6737 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6740 legitimate_pic_operand_p (rtx x)
6744 switch (GET_CODE (x))
6747 inner = XEXP (x, 0);
6748 if (GET_CODE (inner) == PLUS
6749 && CONST_INT_P (XEXP (inner, 1)))
6750 inner = XEXP (inner, 0);
6752 /* Only some unspecs are valid as "constants". */
6753 if (GET_CODE (inner) == UNSPEC)
6754 switch (XINT (inner, 1))
6759 return TARGET_64BIT;
6761 x = XVECEXP (inner, 0, 0);
6762 return (GET_CODE (x) == SYMBOL_REF
6763 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6771 return legitimate_pic_address_disp_p (x);
6778 /* Determine if a given CONST RTX is a valid memory displacement
6782 legitimate_pic_address_disp_p (rtx disp)
6786 /* In 64bit mode we can allow direct addresses of symbols and labels
6787 when they are not dynamic symbols. */
6790 rtx op0 = disp, op1;
6792 switch (GET_CODE (disp))
6798 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6800 op0 = XEXP (XEXP (disp, 0), 0);
6801 op1 = XEXP (XEXP (disp, 0), 1);
6802 if (!CONST_INT_P (op1)
6803 || INTVAL (op1) >= 16*1024*1024
6804 || INTVAL (op1) < -16*1024*1024)
6806 if (GET_CODE (op0) == LABEL_REF)
6808 if (GET_CODE (op0) != SYMBOL_REF)
6813 /* TLS references should always be enclosed in UNSPEC. */
6814 if (SYMBOL_REF_TLS_MODEL (op0))
6816 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6817 && ix86_cmodel != CM_LARGE_PIC)
6825 if (GET_CODE (disp) != CONST)
6827 disp = XEXP (disp, 0);
6831 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6832 of GOT tables. We should not need these anyway. */
6833 if (GET_CODE (disp) != UNSPEC
6834 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6835 && XINT (disp, 1) != UNSPEC_GOTOFF
6836 && XINT (disp, 1) != UNSPEC_PLTOFF))
6839 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6840 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6846 if (GET_CODE (disp) == PLUS)
6848 if (!CONST_INT_P (XEXP (disp, 1)))
6850 disp = XEXP (disp, 0);
6854 if (TARGET_MACHO && darwin_local_data_pic (disp))
6857 if (GET_CODE (disp) != UNSPEC)
6860 switch (XINT (disp, 1))
6865 /* We need to check for both symbols and labels because VxWorks loads
6866 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6868 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6869 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6871 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6872 While ABI specify also 32bit relocation but we don't produce it in
6873 small PIC model at all. */
6874 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6875 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6877 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6879 case UNSPEC_GOTTPOFF:
6880 case UNSPEC_GOTNTPOFF:
6881 case UNSPEC_INDNTPOFF:
6884 disp = XVECEXP (disp, 0, 0);
6885 return (GET_CODE (disp) == SYMBOL_REF
6886 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6888 disp = XVECEXP (disp, 0, 0);
6889 return (GET_CODE (disp) == SYMBOL_REF
6890 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6892 disp = XVECEXP (disp, 0, 0);
6893 return (GET_CODE (disp) == SYMBOL_REF
6894 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6900 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6901 memory address for an instruction. The MODE argument is the machine mode
6902 for the MEM expression that wants to use this address.
6904 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6905 convert common non-canonical forms to canonical form so that they will
6909 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6910 rtx addr, int strict)
6912 struct ix86_address parts;
6913 rtx base, index, disp;
6914 HOST_WIDE_INT scale;
6915 const char *reason = NULL;
6916 rtx reason_rtx = NULL_RTX;
6918 if (ix86_decompose_address (addr, &parts) <= 0)
6920 reason = "decomposition failed";
6925 index = parts.index;
6927 scale = parts.scale;
6929 /* Validate base register.
6931 Don't allow SUBREG's that span more than a word here. It can lead to spill
6932 failures when the base is one word out of a two word structure, which is
6933 represented internally as a DImode int. */
6942 else if (GET_CODE (base) == SUBREG
6943 && REG_P (SUBREG_REG (base))
6944 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6946 reg = SUBREG_REG (base);
6949 reason = "base is not a register";
6953 if (GET_MODE (base) != Pmode)
6955 reason = "base is not in Pmode";
6959 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6960 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6962 reason = "base is not valid";
6967 /* Validate index register.
6969 Don't allow SUBREG's that span more than a word here -- same as above. */
6978 else if (GET_CODE (index) == SUBREG
6979 && REG_P (SUBREG_REG (index))
6980 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6982 reg = SUBREG_REG (index);
6985 reason = "index is not a register";
6989 if (GET_MODE (index) != Pmode)
6991 reason = "index is not in Pmode";
6995 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6996 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6998 reason = "index is not valid";
7003 /* Validate scale factor. */
7006 reason_rtx = GEN_INT (scale);
7009 reason = "scale without index";
7013 if (scale != 2 && scale != 4 && scale != 8)
7015 reason = "scale is not a valid multiplier";
7020 /* Validate displacement. */
7025 if (GET_CODE (disp) == CONST
7026 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7027 switch (XINT (XEXP (disp, 0), 1))
7029 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7030 used. While ABI specify also 32bit relocations, we don't produce
7031 them at all and use IP relative instead. */
7034 gcc_assert (flag_pic);
7036 goto is_legitimate_pic;
7037 reason = "64bit address unspec";
7040 case UNSPEC_GOTPCREL:
7041 gcc_assert (flag_pic);
7042 goto is_legitimate_pic;
7044 case UNSPEC_GOTTPOFF:
7045 case UNSPEC_GOTNTPOFF:
7046 case UNSPEC_INDNTPOFF:
7052 reason = "invalid address unspec";
7056 else if (SYMBOLIC_CONST (disp)
7060 && MACHOPIC_INDIRECT
7061 && !machopic_operand_p (disp)
7067 if (TARGET_64BIT && (index || base))
7069 /* foo@dtpoff(%rX) is ok. */
7070 if (GET_CODE (disp) != CONST
7071 || GET_CODE (XEXP (disp, 0)) != PLUS
7072 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7073 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7074 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7075 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7077 reason = "non-constant pic memory reference";
7081 else if (! legitimate_pic_address_disp_p (disp))
7083 reason = "displacement is an invalid pic construct";
7087 /* This code used to verify that a symbolic pic displacement
7088 includes the pic_offset_table_rtx register.
7090 While this is good idea, unfortunately these constructs may
7091 be created by "adds using lea" optimization for incorrect
7100 This code is nonsensical, but results in addressing
7101 GOT table with pic_offset_table_rtx base. We can't
7102 just refuse it easily, since it gets matched by
7103 "addsi3" pattern, that later gets split to lea in the
7104 case output register differs from input. While this
7105 can be handled by separate addsi pattern for this case
7106 that never results in lea, this seems to be easier and
7107 correct fix for crash to disable this test. */
7109 else if (GET_CODE (disp) != LABEL_REF
7110 && !CONST_INT_P (disp)
7111 && (GET_CODE (disp) != CONST
7112 || !legitimate_constant_p (disp))
7113 && (GET_CODE (disp) != SYMBOL_REF
7114 || !legitimate_constant_p (disp)))
7116 reason = "displacement is not constant";
7119 else if (TARGET_64BIT
7120 && !x86_64_immediate_operand (disp, VOIDmode))
7122 reason = "displacement is out of range";
7127 /* Everything looks valid. */
7134 /* Return a unique alias set for the GOT. */
7136 static HOST_WIDE_INT
7137 ix86_GOT_alias_set (void)
7139 static HOST_WIDE_INT set = -1;
7141 set = new_alias_set ();
7145 /* Return a legitimate reference for ORIG (an address) using the
7146 register REG. If REG is 0, a new pseudo is generated.
7148 There are two types of references that must be handled:
7150 1. Global data references must load the address from the GOT, via
7151 the PIC reg. An insn is emitted to do this load, and the reg is
7154 2. Static data references, constant pool addresses, and code labels
7155 compute the address as an offset from the GOT, whose base is in
7156 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7157 differentiate them from global data objects. The returned
7158 address is the PIC reg + an unspec constant.
7160 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7161 reg also appears in the address. */
7164 legitimize_pic_address (rtx orig, rtx reg)
7171 if (TARGET_MACHO && !TARGET_64BIT)
7174 reg = gen_reg_rtx (Pmode);
7175 /* Use the generic Mach-O PIC machinery. */
7176 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7180 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7182 else if (TARGET_64BIT
7183 && ix86_cmodel != CM_SMALL_PIC
7184 && gotoff_operand (addr, Pmode))
7187 /* This symbol may be referenced via a displacement from the PIC
7188 base address (@GOTOFF). */
7190 if (reload_in_progress)
7191 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7192 if (GET_CODE (addr) == CONST)
7193 addr = XEXP (addr, 0);
7194 if (GET_CODE (addr) == PLUS)
7196 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7198 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7201 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7202 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7204 tmpreg = gen_reg_rtx (Pmode);
7207 emit_move_insn (tmpreg, new_rtx);
7211 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7212 tmpreg, 1, OPTAB_DIRECT);
7215 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7217 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7219 /* This symbol may be referenced via a displacement from the PIC
7220 base address (@GOTOFF). */
7222 if (reload_in_progress)
7223 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7224 if (GET_CODE (addr) == CONST)
7225 addr = XEXP (addr, 0);
7226 if (GET_CODE (addr) == PLUS)
7228 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7230 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7233 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7234 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7235 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7239 emit_move_insn (reg, new_rtx);
7243 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7244 /* We can't use @GOTOFF for text labels on VxWorks;
7245 see gotoff_operand. */
7246 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7248 /* Given that we've already handled dllimport variables separately
7249 in legitimize_address, and all other variables should satisfy
7250 legitimate_pic_address_disp_p, we should never arrive here. */
7251 gcc_assert (!TARGET_64BIT_MS_ABI);
7253 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7255 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7256 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7257 new_rtx = gen_const_mem (Pmode, new_rtx);
7258 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7261 reg = gen_reg_rtx (Pmode);
7262 /* Use directly gen_movsi, otherwise the address is loaded
7263 into register for CSE. We don't want to CSE this addresses,
7264 instead we CSE addresses from the GOT table, so skip this. */
7265 emit_insn (gen_movsi (reg, new_rtx));
7270 /* This symbol must be referenced via a load from the
7271 Global Offset Table (@GOT). */
7273 if (reload_in_progress)
7274 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7275 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7276 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7278 new_rtx = force_reg (Pmode, new_rtx);
7279 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7280 new_rtx = gen_const_mem (Pmode, new_rtx);
7281 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7284 reg = gen_reg_rtx (Pmode);
7285 emit_move_insn (reg, new_rtx);
7291 if (CONST_INT_P (addr)
7292 && !x86_64_immediate_operand (addr, VOIDmode))
7296 emit_move_insn (reg, addr);
7300 new_rtx = force_reg (Pmode, addr);
7302 else if (GET_CODE (addr) == CONST)
7304 addr = XEXP (addr, 0);
7306 /* We must match stuff we generate before. Assume the only
7307 unspecs that can get here are ours. Not that we could do
7308 anything with them anyway.... */
7309 if (GET_CODE (addr) == UNSPEC
7310 || (GET_CODE (addr) == PLUS
7311 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7313 gcc_assert (GET_CODE (addr) == PLUS);
7315 if (GET_CODE (addr) == PLUS)
7317 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7319 /* Check first to see if this is a constant offset from a @GOTOFF
7320 symbol reference. */
7321 if (gotoff_operand (op0, Pmode)
7322 && CONST_INT_P (op1))
7326 if (reload_in_progress)
7327 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7328 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7330 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7331 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7332 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7336 emit_move_insn (reg, new_rtx);
7342 if (INTVAL (op1) < -16*1024*1024
7343 || INTVAL (op1) >= 16*1024*1024)
7345 if (!x86_64_immediate_operand (op1, Pmode))
7346 op1 = force_reg (Pmode, op1);
7347 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7353 base = legitimize_pic_address (XEXP (addr, 0), reg);
7354 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7355 base == reg ? NULL_RTX : reg);
7357 if (CONST_INT_P (new_rtx))
7358 new_rtx = plus_constant (base, INTVAL (new_rtx));
7361 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7363 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7364 new_rtx = XEXP (new_rtx, 1);
7366 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7374 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7377 get_thread_pointer (int to_reg)
7381 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7385 reg = gen_reg_rtx (Pmode);
7386 insn = gen_rtx_SET (VOIDmode, reg, tp);
7387 insn = emit_insn (insn);
7392 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7393 false if we expect this to be used for a memory address and true if
7394 we expect to load the address into a register. */
7397 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7399 rtx dest, base, off, pic, tp;
7404 case TLS_MODEL_GLOBAL_DYNAMIC:
7405 dest = gen_reg_rtx (Pmode);
7406 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7408 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7410 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7413 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7414 insns = get_insns ();
7417 CONST_OR_PURE_CALL_P (insns) = 1;
7418 emit_libcall_block (insns, dest, rax, x);
7420 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7421 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7423 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7425 if (TARGET_GNU2_TLS)
7427 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7429 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7433 case TLS_MODEL_LOCAL_DYNAMIC:
7434 base = gen_reg_rtx (Pmode);
7435 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7437 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7439 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7442 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7443 insns = get_insns ();
7446 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7447 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7448 CONST_OR_PURE_CALL_P (insns) = 1;
7449 emit_libcall_block (insns, base, rax, note);
7451 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7452 emit_insn (gen_tls_local_dynamic_base_64 (base));
7454 emit_insn (gen_tls_local_dynamic_base_32 (base));
7456 if (TARGET_GNU2_TLS)
7458 rtx x = ix86_tls_module_base ();
7460 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7461 gen_rtx_MINUS (Pmode, x, tp));
7464 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7465 off = gen_rtx_CONST (Pmode, off);
7467 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7469 if (TARGET_GNU2_TLS)
7471 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7473 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7478 case TLS_MODEL_INITIAL_EXEC:
7482 type = UNSPEC_GOTNTPOFF;
7486 if (reload_in_progress)
7487 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7488 pic = pic_offset_table_rtx;
7489 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7491 else if (!TARGET_ANY_GNU_TLS)
7493 pic = gen_reg_rtx (Pmode);
7494 emit_insn (gen_set_got (pic));
7495 type = UNSPEC_GOTTPOFF;
7500 type = UNSPEC_INDNTPOFF;
7503 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7504 off = gen_rtx_CONST (Pmode, off);
7506 off = gen_rtx_PLUS (Pmode, pic, off);
7507 off = gen_const_mem (Pmode, off);
7508 set_mem_alias_set (off, ix86_GOT_alias_set ());
7510 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7512 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7513 off = force_reg (Pmode, off);
7514 return gen_rtx_PLUS (Pmode, base, off);
7518 base = get_thread_pointer (true);
7519 dest = gen_reg_rtx (Pmode);
7520 emit_insn (gen_subsi3 (dest, base, off));
7524 case TLS_MODEL_LOCAL_EXEC:
7525 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7526 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7527 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7528 off = gen_rtx_CONST (Pmode, off);
7530 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7532 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7533 return gen_rtx_PLUS (Pmode, base, off);
7537 base = get_thread_pointer (true);
7538 dest = gen_reg_rtx (Pmode);
7539 emit_insn (gen_subsi3 (dest, base, off));
7550 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7553 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7554 htab_t dllimport_map;
7557 get_dllimport_decl (tree decl)
7559 struct tree_map *h, in;
7563 size_t namelen, prefixlen;
7569 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7571 in.hash = htab_hash_pointer (decl);
7572 in.base.from = decl;
7573 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7574 h = (struct tree_map *) *loc;
7578 *loc = h = GGC_NEW (struct tree_map);
7580 h->base.from = decl;
7581 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7582 DECL_ARTIFICIAL (to) = 1;
7583 DECL_IGNORED_P (to) = 1;
7584 DECL_EXTERNAL (to) = 1;
7585 TREE_READONLY (to) = 1;
7587 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7588 name = targetm.strip_name_encoding (name);
7589 if (name[0] == FASTCALL_PREFIX)
7595 prefix = "*__imp__";
7597 namelen = strlen (name);
7598 prefixlen = strlen (prefix);
7599 imp_name = (char *) alloca (namelen + prefixlen + 1);
7600 memcpy (imp_name, prefix, prefixlen);
7601 memcpy (imp_name + prefixlen, name, namelen + 1);
7603 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7604 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7605 SET_SYMBOL_REF_DECL (rtl, to);
7606 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7608 rtl = gen_const_mem (Pmode, rtl);
7609 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7611 SET_DECL_RTL (to, rtl);
7616 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7617 true if we require the result be a register. */
7620 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7625 gcc_assert (SYMBOL_REF_DECL (symbol));
7626 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7628 x = DECL_RTL (imp_decl);
7630 x = force_reg (Pmode, x);
7634 /* Try machine-dependent ways of modifying an illegitimate address
7635 to be legitimate. If we find one, return the new, valid address.
7636 This macro is used in only one place: `memory_address' in explow.c.
7638 OLDX is the address as it was before break_out_memory_refs was called.
7639 In some cases it is useful to look at this to decide what needs to be done.
7641 MODE and WIN are passed so that this macro can use
7642 GO_IF_LEGITIMATE_ADDRESS.
7644 It is always safe for this macro to do nothing. It exists to recognize
7645 opportunities to optimize the output.
7647 For the 80386, we handle X+REG by loading X into a register R and
7648 using R+REG. R will go in a general reg and indexing will be used.
7649 However, if REG is a broken-out memory address or multiplication,
7650 nothing needs to be done because REG can certainly go in a general reg.
7652 When -fpic is used, special handling is needed for symbolic references.
7653 See comments by legitimize_pic_address in i386.c for details. */
7656 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7661 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7663 return legitimize_tls_address (x, (enum tls_model) log, false);
7664 if (GET_CODE (x) == CONST
7665 && GET_CODE (XEXP (x, 0)) == PLUS
7666 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7667 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7669 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7670 (enum tls_model) log, false);
7671 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7674 if (flag_pic && SYMBOLIC_CONST (x))
7675 return legitimize_pic_address (x, 0);
7677 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7679 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7680 return legitimize_dllimport_symbol (x, true);
7681 if (GET_CODE (x) == CONST
7682 && GET_CODE (XEXP (x, 0)) == PLUS
7683 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7684 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7686 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7687 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7691 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7692 if (GET_CODE (x) == ASHIFT
7693 && CONST_INT_P (XEXP (x, 1))
7694 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7697 log = INTVAL (XEXP (x, 1));
7698 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7699 GEN_INT (1 << log));
7702 if (GET_CODE (x) == PLUS)
7704 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7706 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7707 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7708 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7711 log = INTVAL (XEXP (XEXP (x, 0), 1));
7712 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7713 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7714 GEN_INT (1 << log));
7717 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7718 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7719 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7722 log = INTVAL (XEXP (XEXP (x, 1), 1));
7723 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7724 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7725 GEN_INT (1 << log));
7728 /* Put multiply first if it isn't already. */
7729 if (GET_CODE (XEXP (x, 1)) == MULT)
7731 rtx tmp = XEXP (x, 0);
7732 XEXP (x, 0) = XEXP (x, 1);
7737 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7738 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7739 created by virtual register instantiation, register elimination, and
7740 similar optimizations. */
7741 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7744 x = gen_rtx_PLUS (Pmode,
7745 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7746 XEXP (XEXP (x, 1), 0)),
7747 XEXP (XEXP (x, 1), 1));
7751 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7752 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7753 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7754 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7755 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7756 && CONSTANT_P (XEXP (x, 1)))
7759 rtx other = NULL_RTX;
7761 if (CONST_INT_P (XEXP (x, 1)))
7763 constant = XEXP (x, 1);
7764 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7766 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7768 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7769 other = XEXP (x, 1);
7777 x = gen_rtx_PLUS (Pmode,
7778 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7779 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7780 plus_constant (other, INTVAL (constant)));
7784 if (changed && legitimate_address_p (mode, x, FALSE))
7787 if (GET_CODE (XEXP (x, 0)) == MULT)
7790 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7793 if (GET_CODE (XEXP (x, 1)) == MULT)
7796 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7800 && REG_P (XEXP (x, 1))
7801 && REG_P (XEXP (x, 0)))
7804 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7807 x = legitimize_pic_address (x, 0);
7810 if (changed && legitimate_address_p (mode, x, FALSE))
7813 if (REG_P (XEXP (x, 0)))
7815 rtx temp = gen_reg_rtx (Pmode);
7816 rtx val = force_operand (XEXP (x, 1), temp);
7818 emit_move_insn (temp, val);
7824 else if (REG_P (XEXP (x, 1)))
7826 rtx temp = gen_reg_rtx (Pmode);
7827 rtx val = force_operand (XEXP (x, 0), temp);
7829 emit_move_insn (temp, val);
7839 /* Print an integer constant expression in assembler syntax. Addition
7840 and subtraction are the only arithmetic that may appear in these
7841 expressions. FILE is the stdio stream to write to, X is the rtx, and
7842 CODE is the operand print code from the output string. */
7845 output_pic_addr_const (FILE *file, rtx x, int code)
7849 switch (GET_CODE (x))
7852 gcc_assert (flag_pic);
7857 if (! TARGET_MACHO || TARGET_64BIT)
7858 output_addr_const (file, x);
7861 const char *name = XSTR (x, 0);
7863 /* Mark the decl as referenced so that cgraph will
7864 output the function. */
7865 if (SYMBOL_REF_DECL (x))
7866 mark_decl_referenced (SYMBOL_REF_DECL (x));
7869 if (MACHOPIC_INDIRECT
7870 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7871 name = machopic_indirection_name (x, /*stub_p=*/true);
7873 assemble_name (file, name);
7875 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7876 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7877 fputs ("@PLT", file);
7884 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7885 assemble_name (asm_out_file, buf);
7889 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7893 /* This used to output parentheses around the expression,
7894 but that does not work on the 386 (either ATT or BSD assembler). */
7895 output_pic_addr_const (file, XEXP (x, 0), code);
7899 if (GET_MODE (x) == VOIDmode)
7901 /* We can use %d if the number is <32 bits and positive. */
7902 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7903 fprintf (file, "0x%lx%08lx",
7904 (unsigned long) CONST_DOUBLE_HIGH (x),
7905 (unsigned long) CONST_DOUBLE_LOW (x));
7907 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7910 /* We can't handle floating point constants;
7911 PRINT_OPERAND must handle them. */
7912 output_operand_lossage ("floating constant misused");
7916 /* Some assemblers need integer constants to appear first. */
7917 if (CONST_INT_P (XEXP (x, 0)))
7919 output_pic_addr_const (file, XEXP (x, 0), code);
7921 output_pic_addr_const (file, XEXP (x, 1), code);
7925 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7926 output_pic_addr_const (file, XEXP (x, 1), code);
7928 output_pic_addr_const (file, XEXP (x, 0), code);
7934 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7935 output_pic_addr_const (file, XEXP (x, 0), code);
7937 output_pic_addr_const (file, XEXP (x, 1), code);
7939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7943 gcc_assert (XVECLEN (x, 0) == 1);
7944 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7945 switch (XINT (x, 1))
7948 fputs ("@GOT", file);
7951 fputs ("@GOTOFF", file);
7954 fputs ("@PLTOFF", file);
7956 case UNSPEC_GOTPCREL:
7957 fputs ("@GOTPCREL(%rip)", file);
7959 case UNSPEC_GOTTPOFF:
7960 /* FIXME: This might be @TPOFF in Sun ld too. */
7961 fputs ("@GOTTPOFF", file);
7964 fputs ("@TPOFF", file);
7968 fputs ("@TPOFF", file);
7970 fputs ("@NTPOFF", file);
7973 fputs ("@DTPOFF", file);
7975 case UNSPEC_GOTNTPOFF:
7977 fputs ("@GOTTPOFF(%rip)", file);
7979 fputs ("@GOTNTPOFF", file);
7981 case UNSPEC_INDNTPOFF:
7982 fputs ("@INDNTPOFF", file);
7985 output_operand_lossage ("invalid UNSPEC as operand");
7991 output_operand_lossage ("invalid expression as operand");
7995 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7996 We need to emit DTP-relative relocations. */
7998 static void ATTRIBUTE_UNUSED
7999 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8001 fputs (ASM_LONG, file);
8002 output_addr_const (file, x);
8003 fputs ("@DTPOFF", file);
8009 fputs (", 0", file);
8016 /* In the name of slightly smaller debug output, and to cater to
8017 general assembler lossage, recognize PIC+GOTOFF and turn it back
8018 into a direct symbol reference.
8020 On Darwin, this is necessary to avoid a crash, because Darwin
8021 has a different PIC label for each routine but the DWARF debugging
8022 information is not associated with any particular routine, so it's
8023 necessary to remove references to the PIC label from RTL stored by
8024 the DWARF output code. */
8027 ix86_delegitimize_address (rtx orig_x)
8030 /* reg_addend is NULL or a multiple of some register. */
8031 rtx reg_addend = NULL_RTX;
8032 /* const_addend is NULL or a const_int. */
8033 rtx const_addend = NULL_RTX;
8034 /* This is the result, or NULL. */
8035 rtx result = NULL_RTX;
8042 if (GET_CODE (x) != CONST
8043 || GET_CODE (XEXP (x, 0)) != UNSPEC
8044 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8047 return XVECEXP (XEXP (x, 0), 0, 0);
8050 if (GET_CODE (x) != PLUS
8051 || GET_CODE (XEXP (x, 1)) != CONST)
8054 if (REG_P (XEXP (x, 0))
8055 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8056 /* %ebx + GOT/GOTOFF */
8058 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8060 /* %ebx + %reg * scale + GOT/GOTOFF */
8061 reg_addend = XEXP (x, 0);
8062 if (REG_P (XEXP (reg_addend, 0))
8063 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8064 reg_addend = XEXP (reg_addend, 1);
8065 else if (REG_P (XEXP (reg_addend, 1))
8066 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8067 reg_addend = XEXP (reg_addend, 0);
8070 if (!REG_P (reg_addend)
8071 && GET_CODE (reg_addend) != MULT
8072 && GET_CODE (reg_addend) != ASHIFT)
8078 x = XEXP (XEXP (x, 1), 0);
8079 if (GET_CODE (x) == PLUS
8080 && CONST_INT_P (XEXP (x, 1)))
8082 const_addend = XEXP (x, 1);
8086 if (GET_CODE (x) == UNSPEC
8087 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8088 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8089 result = XVECEXP (x, 0, 0);
8091 if (TARGET_MACHO && darwin_local_data_pic (x)
8093 result = XEXP (x, 0);
8099 result = gen_rtx_PLUS (Pmode, result, const_addend);
8101 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8105 /* If X is a machine specific address (i.e. a symbol or label being
8106 referenced as a displacement from the GOT implemented using an
8107 UNSPEC), then return the base term. Otherwise return X. */
8110 ix86_find_base_term (rtx x)
8116 if (GET_CODE (x) != CONST)
8119 if (GET_CODE (term) == PLUS
8120 && (CONST_INT_P (XEXP (term, 1))
8121 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8122 term = XEXP (term, 0);
8123 if (GET_CODE (term) != UNSPEC
8124 || XINT (term, 1) != UNSPEC_GOTPCREL)
8127 term = XVECEXP (term, 0, 0);
8129 if (GET_CODE (term) != SYMBOL_REF
8130 && GET_CODE (term) != LABEL_REF)
8136 term = ix86_delegitimize_address (x);
8138 if (GET_CODE (term) != SYMBOL_REF
8139 && GET_CODE (term) != LABEL_REF)
8146 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8151 if (mode == CCFPmode || mode == CCFPUmode)
8153 enum rtx_code second_code, bypass_code;
8154 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8155 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8156 code = ix86_fp_compare_code_to_integer (code);
8160 code = reverse_condition (code);
8211 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8215 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8216 Those same assemblers have the same but opposite lossage on cmov. */
8217 gcc_assert (mode == CCmode);
8218 suffix = fp ? "nbe" : "a";
8238 gcc_assert (mode == CCmode);
8260 gcc_assert (mode == CCmode);
8261 suffix = fp ? "nb" : "ae";
8264 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8268 gcc_assert (mode == CCmode);
8272 suffix = fp ? "u" : "p";
8275 suffix = fp ? "nu" : "np";
8280 fputs (suffix, file);
8283 /* Print the name of register X to FILE based on its machine mode and number.
8284 If CODE is 'w', pretend the mode is HImode.
8285 If CODE is 'b', pretend the mode is QImode.
8286 If CODE is 'k', pretend the mode is SImode.
8287 If CODE is 'q', pretend the mode is DImode.
8288 If CODE is 'h', pretend the reg is the 'high' byte register.
8289 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8292 print_reg (rtx x, int code, FILE *file)
8294 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8295 && REGNO (x) != FRAME_POINTER_REGNUM
8296 && REGNO (x) != FLAGS_REG
8297 && REGNO (x) != FPSR_REG
8298 && REGNO (x) != FPCR_REG);
8300 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8303 if (code == 'w' || MMX_REG_P (x))
8305 else if (code == 'b')
8307 else if (code == 'k')
8309 else if (code == 'q')
8311 else if (code == 'y')
8313 else if (code == 'h')
8316 code = GET_MODE_SIZE (GET_MODE (x));
8318 /* Irritatingly, AMD extended registers use different naming convention
8319 from the normal registers. */
8320 if (REX_INT_REG_P (x))
8322 gcc_assert (TARGET_64BIT);
8326 error ("extended registers have no high halves");
8329 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8332 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8335 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8338 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8341 error ("unsupported operand size for extended register");
8349 if (STACK_TOP_P (x))
8351 fputs ("st(0)", file);
8358 if (! ANY_FP_REG_P (x))
8359 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8364 fputs (hi_reg_name[REGNO (x)], file);
8367 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8369 fputs (qi_reg_name[REGNO (x)], file);
8372 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8374 fputs (qi_high_reg_name[REGNO (x)], file);
8381 /* Locate some local-dynamic symbol still in use by this function
8382 so that we can print its name in some tls_local_dynamic_base
8386 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8390 if (GET_CODE (x) == SYMBOL_REF
8391 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8393 cfun->machine->some_ld_name = XSTR (x, 0);
8401 get_some_local_dynamic_name (void)
8405 if (cfun->machine->some_ld_name)
8406 return cfun->machine->some_ld_name;
8408 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8410 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8411 return cfun->machine->some_ld_name;
8417 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8418 C -- print opcode suffix for set/cmov insn.
8419 c -- like C, but print reversed condition
8420 F,f -- likewise, but for floating-point.
8421 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8423 R -- print the prefix for register names.
8424 z -- print the opcode suffix for the size of the current operand.
8425 * -- print a star (in certain assembler syntax)
8426 A -- print an absolute memory reference.
8427 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8428 s -- print a shift double count, followed by the assemblers argument
8430 b -- print the QImode name of the register for the indicated operand.
8431 %b0 would print %al if operands[0] is reg 0.
8432 w -- likewise, print the HImode name of the register.
8433 k -- likewise, print the SImode name of the register.
8434 q -- likewise, print the DImode name of the register.
8435 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8436 y -- print "st(0)" instead of "st" as a register.
8437 D -- print condition for SSE cmp instruction.
8438 P -- if PIC, print an @PLT suffix.
8439 X -- don't print any sort of PIC '@' suffix for a symbol.
8440 & -- print some in-use local-dynamic symbol name.
8441 H -- print a memory address offset by 8; used for sse high-parts
8445 print_operand (FILE *file, rtx x, int code)
8452 if (ASSEMBLER_DIALECT == ASM_ATT)
8457 assemble_name (file, get_some_local_dynamic_name ());
8461 switch (ASSEMBLER_DIALECT)
8468 /* Intel syntax. For absolute addresses, registers should not
8469 be surrounded by braces. */
8473 PRINT_OPERAND (file, x, 0);
8483 PRINT_OPERAND (file, x, 0);
8488 if (ASSEMBLER_DIALECT == ASM_ATT)
8493 if (ASSEMBLER_DIALECT == ASM_ATT)
8498 if (ASSEMBLER_DIALECT == ASM_ATT)
8503 if (ASSEMBLER_DIALECT == ASM_ATT)
8508 if (ASSEMBLER_DIALECT == ASM_ATT)
8513 if (ASSEMBLER_DIALECT == ASM_ATT)
8518 /* 387 opcodes don't get size suffixes if the operands are
8520 if (STACK_REG_P (x))
8523 /* Likewise if using Intel opcodes. */
8524 if (ASSEMBLER_DIALECT == ASM_INTEL)
8527 /* This is the size of op from size of operand. */
8528 switch (GET_MODE_SIZE (GET_MODE (x)))
8537 #ifdef HAVE_GAS_FILDS_FISTS
8547 if (GET_MODE (x) == SFmode)
8562 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8564 #ifdef GAS_MNEMONICS
8590 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8592 PRINT_OPERAND (file, x, 0);
8598 /* Little bit of braindamage here. The SSE compare instructions
8599 does use completely different names for the comparisons that the
8600 fp conditional moves. */
8601 switch (GET_CODE (x))
8616 fputs ("unord", file);
8620 fputs ("neq", file);
8624 fputs ("nlt", file);
8628 fputs ("nle", file);
8631 fputs ("ord", file);
8638 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8639 if (ASSEMBLER_DIALECT == ASM_ATT)
8641 switch (GET_MODE (x))
8643 case HImode: putc ('w', file); break;
8645 case SFmode: putc ('l', file); break;
8647 case DFmode: putc ('q', file); break;
8648 default: gcc_unreachable ();
8655 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8658 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8659 if (ASSEMBLER_DIALECT == ASM_ATT)
8662 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8665 /* Like above, but reverse condition */
8667 /* Check to see if argument to %c is really a constant
8668 and not a condition code which needs to be reversed. */
8669 if (!COMPARISON_P (x))
8671 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8674 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8677 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8678 if (ASSEMBLER_DIALECT == ASM_ATT)
8681 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8685 /* It doesn't actually matter what mode we use here, as we're
8686 only going to use this for printing. */
8687 x = adjust_address_nv (x, DImode, 8);
8694 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8697 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8700 int pred_val = INTVAL (XEXP (x, 0));
8702 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8703 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8705 int taken = pred_val > REG_BR_PROB_BASE / 2;
8706 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8708 /* Emit hints only in the case default branch prediction
8709 heuristics would fail. */
8710 if (taken != cputaken)
8712 /* We use 3e (DS) prefix for taken branches and
8713 2e (CS) prefix for not taken branches. */
8715 fputs ("ds ; ", file);
8717 fputs ("cs ; ", file);
8724 output_operand_lossage ("invalid operand code '%c'", code);
8729 print_reg (x, code, file);
8733 /* No `byte ptr' prefix for call instructions. */
8734 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8737 switch (GET_MODE_SIZE (GET_MODE (x)))
8739 case 1: size = "BYTE"; break;
8740 case 2: size = "WORD"; break;
8741 case 4: size = "DWORD"; break;
8742 case 8: size = "QWORD"; break;
8743 case 12: size = "XWORD"; break;
8744 case 16: size = "XMMWORD"; break;
8749 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8752 else if (code == 'w')
8754 else if (code == 'k')
8758 fputs (" PTR ", file);
8762 /* Avoid (%rip) for call operands. */
8763 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8764 && !CONST_INT_P (x))
8765 output_addr_const (file, x);
8766 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8767 output_operand_lossage ("invalid constraints for operand");
8772 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8777 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8778 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8780 if (ASSEMBLER_DIALECT == ASM_ATT)
8782 fprintf (file, "0x%08lx", l);
8785 /* These float cases don't actually occur as immediate operands. */
8786 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8790 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8791 fprintf (file, "%s", dstr);
8794 else if (GET_CODE (x) == CONST_DOUBLE
8795 && GET_MODE (x) == XFmode)
8799 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8800 fprintf (file, "%s", dstr);
8805 /* We have patterns that allow zero sets of memory, for instance.
8806 In 64-bit mode, we should probably support all 8-byte vectors,
8807 since we can in fact encode that into an immediate. */
8808 if (GET_CODE (x) == CONST_VECTOR)
8810 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8816 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8818 if (ASSEMBLER_DIALECT == ASM_ATT)
8821 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8822 || GET_CODE (x) == LABEL_REF)
8824 if (ASSEMBLER_DIALECT == ASM_ATT)
8827 fputs ("OFFSET FLAT:", file);
8830 if (CONST_INT_P (x))
8831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8833 output_pic_addr_const (file, x, code);
8835 output_addr_const (file, x);
8839 /* Print a memory operand whose address is ADDR. */
8842 print_operand_address (FILE *file, rtx addr)
8844 struct ix86_address parts;
8845 rtx base, index, disp;
8847 int ok = ix86_decompose_address (addr, &parts);
8852 index = parts.index;
8854 scale = parts.scale;
8862 if (USER_LABEL_PREFIX[0] == 0)
8864 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8870 if (!base && !index)
8872 /* Displacement only requires special attention. */
8874 if (CONST_INT_P (disp))
8876 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8878 if (USER_LABEL_PREFIX[0] == 0)
8880 fputs ("ds:", file);
8882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8885 output_pic_addr_const (file, disp, 0);
8887 output_addr_const (file, disp);
8889 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8892 if (GET_CODE (disp) == CONST
8893 && GET_CODE (XEXP (disp, 0)) == PLUS
8894 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8895 disp = XEXP (XEXP (disp, 0), 0);
8896 if (GET_CODE (disp) == LABEL_REF
8897 || (GET_CODE (disp) == SYMBOL_REF
8898 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8899 fputs ("(%rip)", file);
8904 if (ASSEMBLER_DIALECT == ASM_ATT)
8909 output_pic_addr_const (file, disp, 0);
8910 else if (GET_CODE (disp) == LABEL_REF)
8911 output_asm_label (disp);
8913 output_addr_const (file, disp);
8918 print_reg (base, 0, file);
8922 print_reg (index, 0, file);
8924 fprintf (file, ",%d", scale);
8930 rtx offset = NULL_RTX;
8934 /* Pull out the offset of a symbol; print any symbol itself. */
8935 if (GET_CODE (disp) == CONST
8936 && GET_CODE (XEXP (disp, 0)) == PLUS
8937 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8939 offset = XEXP (XEXP (disp, 0), 1);
8940 disp = gen_rtx_CONST (VOIDmode,
8941 XEXP (XEXP (disp, 0), 0));
8945 output_pic_addr_const (file, disp, 0);
8946 else if (GET_CODE (disp) == LABEL_REF)
8947 output_asm_label (disp);
8948 else if (CONST_INT_P (disp))
8951 output_addr_const (file, disp);
8957 print_reg (base, 0, file);
8960 if (INTVAL (offset) >= 0)
8962 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8966 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8973 print_reg (index, 0, file);
8975 fprintf (file, "*%d", scale);
8983 output_addr_const_extra (FILE *file, rtx x)
8987 if (GET_CODE (x) != UNSPEC)
8990 op = XVECEXP (x, 0, 0);
8991 switch (XINT (x, 1))
8993 case UNSPEC_GOTTPOFF:
8994 output_addr_const (file, op);
8995 /* FIXME: This might be @TPOFF in Sun ld. */
8996 fputs ("@GOTTPOFF", file);
8999 output_addr_const (file, op);
9000 fputs ("@TPOFF", file);
9003 output_addr_const (file, op);
9005 fputs ("@TPOFF", file);
9007 fputs ("@NTPOFF", file);
9010 output_addr_const (file, op);
9011 fputs ("@DTPOFF", file);
9013 case UNSPEC_GOTNTPOFF:
9014 output_addr_const (file, op);
9016 fputs ("@GOTTPOFF(%rip)", file);
9018 fputs ("@GOTNTPOFF", file);
9020 case UNSPEC_INDNTPOFF:
9021 output_addr_const (file, op);
9022 fputs ("@INDNTPOFF", file);
9032 /* Split one or more DImode RTL references into pairs of SImode
9033 references. The RTL can be REG, offsettable MEM, integer constant, or
9034 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9035 split and "num" is its length. lo_half and hi_half are output arrays
9036 that parallel "operands". */
9039 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9043 rtx op = operands[num];
9045 /* simplify_subreg refuse to split volatile memory addresses,
9046 but we still have to handle it. */
9049 lo_half[num] = adjust_address (op, SImode, 0);
9050 hi_half[num] = adjust_address (op, SImode, 4);
9054 lo_half[num] = simplify_gen_subreg (SImode, op,
9055 GET_MODE (op) == VOIDmode
9056 ? DImode : GET_MODE (op), 0);
9057 hi_half[num] = simplify_gen_subreg (SImode, op,
9058 GET_MODE (op) == VOIDmode
9059 ? DImode : GET_MODE (op), 4);
9063 /* Split one or more TImode RTL references into pairs of DImode
9064 references. The RTL can be REG, offsettable MEM, integer constant, or
9065 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9066 split and "num" is its length. lo_half and hi_half are output arrays
9067 that parallel "operands". */
9070 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9074 rtx op = operands[num];
9076 /* simplify_subreg refuse to split volatile memory addresses, but we
9077 still have to handle it. */
9080 lo_half[num] = adjust_address (op, DImode, 0);
9081 hi_half[num] = adjust_address (op, DImode, 8);
9085 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9086 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9091 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9092 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9093 is the expression of the binary operation. The output may either be
9094 emitted here, or returned to the caller, like all output_* functions.
9096 There is no guarantee that the operands are the same mode, as they
9097 might be within FLOAT or FLOAT_EXTEND expressions. */
9099 #ifndef SYSV386_COMPAT
9100 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9101 wants to fix the assemblers because that causes incompatibility
9102 with gcc. No-one wants to fix gcc because that causes
9103 incompatibility with assemblers... You can use the option of
9104 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9105 #define SYSV386_COMPAT 1
9109 output_387_binary_op (rtx insn, rtx *operands)
9111 static char buf[30];
9114 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9116 #ifdef ENABLE_CHECKING
9117 /* Even if we do not want to check the inputs, this documents input
9118 constraints. Which helps in understanding the following code. */
9119 if (STACK_REG_P (operands[0])
9120 && ((REG_P (operands[1])
9121 && REGNO (operands[0]) == REGNO (operands[1])
9122 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9123 || (REG_P (operands[2])
9124 && REGNO (operands[0]) == REGNO (operands[2])
9125 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9126 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9129 gcc_assert (is_sse);
9132 switch (GET_CODE (operands[3]))
9135 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9136 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9144 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9145 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9153 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9154 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9162 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9163 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9177 if (GET_MODE (operands[0]) == SFmode)
9178 strcat (buf, "ss\t{%2, %0|%0, %2}");
9180 strcat (buf, "sd\t{%2, %0|%0, %2}");
9185 switch (GET_CODE (operands[3]))
9189 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9191 rtx temp = operands[2];
9192 operands[2] = operands[1];
9196 /* know operands[0] == operands[1]. */
9198 if (MEM_P (operands[2]))
9204 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9206 if (STACK_TOP_P (operands[0]))
9207 /* How is it that we are storing to a dead operand[2]?
9208 Well, presumably operands[1] is dead too. We can't
9209 store the result to st(0) as st(0) gets popped on this
9210 instruction. Instead store to operands[2] (which I
9211 think has to be st(1)). st(1) will be popped later.
9212 gcc <= 2.8.1 didn't have this check and generated
9213 assembly code that the Unixware assembler rejected. */
9214 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9216 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9220 if (STACK_TOP_P (operands[0]))
9221 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9223 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9228 if (MEM_P (operands[1]))
9234 if (MEM_P (operands[2]))
9240 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9243 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9244 derived assemblers, confusingly reverse the direction of
9245 the operation for fsub{r} and fdiv{r} when the
9246 destination register is not st(0). The Intel assembler
9247 doesn't have this brain damage. Read !SYSV386_COMPAT to
9248 figure out what the hardware really does. */
9249 if (STACK_TOP_P (operands[0]))
9250 p = "{p\t%0, %2|rp\t%2, %0}";
9252 p = "{rp\t%2, %0|p\t%0, %2}";
9254 if (STACK_TOP_P (operands[0]))
9255 /* As above for fmul/fadd, we can't store to st(0). */
9256 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9258 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9263 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9266 if (STACK_TOP_P (operands[0]))
9267 p = "{rp\t%0, %1|p\t%1, %0}";
9269 p = "{p\t%1, %0|rp\t%0, %1}";
9271 if (STACK_TOP_P (operands[0]))
9272 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9274 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9279 if (STACK_TOP_P (operands[0]))
9281 if (STACK_TOP_P (operands[1]))
9282 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9284 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9287 else if (STACK_TOP_P (operands[1]))
9290 p = "{\t%1, %0|r\t%0, %1}";
9292 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9298 p = "{r\t%2, %0|\t%0, %2}";
9300 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9313 /* Return needed mode for entity in optimize_mode_switching pass. */
9316 ix86_mode_needed (int entity, rtx insn)
9318 enum attr_i387_cw mode;
9320 /* The mode UNINITIALIZED is used to store control word after a
9321 function call or ASM pattern. The mode ANY specify that function
9322 has no requirements on the control word and make no changes in the
9323 bits we are interested in. */
9326 || (NONJUMP_INSN_P (insn)
9327 && (asm_noperands (PATTERN (insn)) >= 0
9328 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9329 return I387_CW_UNINITIALIZED;
9331 if (recog_memoized (insn) < 0)
9334 mode = get_attr_i387_cw (insn);
9339 if (mode == I387_CW_TRUNC)
9344 if (mode == I387_CW_FLOOR)
9349 if (mode == I387_CW_CEIL)
9354 if (mode == I387_CW_MASK_PM)
9365 /* Output code to initialize control word copies used by trunc?f?i and
9366 rounding patterns. CURRENT_MODE is set to current control word,
9367 while NEW_MODE is set to new control word. */
9370 emit_i387_cw_initialization (int mode)
9372 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9375 enum ix86_stack_slot slot;
9377 rtx reg = gen_reg_rtx (HImode);
9379 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9380 emit_move_insn (reg, copy_rtx (stored_mode));
9382 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9387 /* round toward zero (truncate) */
9388 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9389 slot = SLOT_CW_TRUNC;
9393 /* round down toward -oo */
9394 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9395 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9396 slot = SLOT_CW_FLOOR;
9400 /* round up toward +oo */
9401 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9402 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9403 slot = SLOT_CW_CEIL;
9406 case I387_CW_MASK_PM:
9407 /* mask precision exception for nearbyint() */
9408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9409 slot = SLOT_CW_MASK_PM;
9421 /* round toward zero (truncate) */
9422 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9423 slot = SLOT_CW_TRUNC;
9427 /* round down toward -oo */
9428 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9429 slot = SLOT_CW_FLOOR;
9433 /* round up toward +oo */
9434 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9435 slot = SLOT_CW_CEIL;
9438 case I387_CW_MASK_PM:
9439 /* mask precision exception for nearbyint() */
9440 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9441 slot = SLOT_CW_MASK_PM;
9449 gcc_assert (slot < MAX_386_STACK_LOCALS);
9451 new_mode = assign_386_stack_local (HImode, slot);
9452 emit_move_insn (new_mode, reg);
9455 /* Output code for INSN to convert a float to a signed int. OPERANDS
9456 are the insn operands. The output may be [HSD]Imode and the input
9457 operand may be [SDX]Fmode. */
9460 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9462 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9463 int dimode_p = GET_MODE (operands[0]) == DImode;
9464 int round_mode = get_attr_i387_cw (insn);
9466 /* Jump through a hoop or two for DImode, since the hardware has no
9467 non-popping instruction. We used to do this a different way, but
9468 that was somewhat fragile and broke with post-reload splitters. */
9469 if ((dimode_p || fisttp) && !stack_top_dies)
9470 output_asm_insn ("fld\t%y1", operands);
9472 gcc_assert (STACK_TOP_P (operands[1]));
9473 gcc_assert (MEM_P (operands[0]));
9474 gcc_assert (GET_MODE (operands[1]) != TFmode);
9477 output_asm_insn ("fisttp%z0\t%0", operands);
9480 if (round_mode != I387_CW_ANY)
9481 output_asm_insn ("fldcw\t%3", operands);
9482 if (stack_top_dies || dimode_p)
9483 output_asm_insn ("fistp%z0\t%0", operands);
9485 output_asm_insn ("fist%z0\t%0", operands);
9486 if (round_mode != I387_CW_ANY)
9487 output_asm_insn ("fldcw\t%2", operands);
9493 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9494 have the values zero or one, indicates the ffreep insn's operand
9495 from the OPERANDS array. */
9498 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9500 if (TARGET_USE_FFREEP)
9501 #if HAVE_AS_IX86_FFREEP
9502 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9505 static char retval[] = ".word\t0xc_df";
9506 int regno = REGNO (operands[opno]);
9508 gcc_assert (FP_REGNO_P (regno));
9510 retval[9] = '0' + (regno - FIRST_STACK_REG);
9515 return opno ? "fstp\t%y1" : "fstp\t%y0";
9519 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9520 should be used. UNORDERED_P is true when fucom should be used. */
9523 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9526 rtx cmp_op0, cmp_op1;
9527 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9531 cmp_op0 = operands[0];
9532 cmp_op1 = operands[1];
9536 cmp_op0 = operands[1];
9537 cmp_op1 = operands[2];
9542 if (GET_MODE (operands[0]) == SFmode)
9544 return "ucomiss\t{%1, %0|%0, %1}";
9546 return "comiss\t{%1, %0|%0, %1}";
9549 return "ucomisd\t{%1, %0|%0, %1}";
9551 return "comisd\t{%1, %0|%0, %1}";
9554 gcc_assert (STACK_TOP_P (cmp_op0));
9556 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9558 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9562 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9563 return output_387_ffreep (operands, 1);
9566 return "ftst\n\tfnstsw\t%0";
9569 if (STACK_REG_P (cmp_op1)
9571 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9572 && REGNO (cmp_op1) != FIRST_STACK_REG)
9574 /* If both the top of the 387 stack dies, and the other operand
9575 is also a stack register that dies, then this must be a
9576 `fcompp' float compare */
9580 /* There is no double popping fcomi variant. Fortunately,
9581 eflags is immune from the fstp's cc clobbering. */
9583 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9585 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9586 return output_387_ffreep (operands, 0);
9591 return "fucompp\n\tfnstsw\t%0";
9593 return "fcompp\n\tfnstsw\t%0";
9598 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9600 static const char * const alt[16] =
9602 "fcom%z2\t%y2\n\tfnstsw\t%0",
9603 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9604 "fucom%z2\t%y2\n\tfnstsw\t%0",
9605 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9607 "ficom%z2\t%y2\n\tfnstsw\t%0",
9608 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9612 "fcomi\t{%y1, %0|%0, %y1}",
9613 "fcomip\t{%y1, %0|%0, %y1}",
9614 "fucomi\t{%y1, %0|%0, %y1}",
9615 "fucomip\t{%y1, %0|%0, %y1}",
9626 mask = eflags_p << 3;
9627 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9628 mask |= unordered_p << 1;
9629 mask |= stack_top_dies;
9631 gcc_assert (mask < 16);
9640 ix86_output_addr_vec_elt (FILE *file, int value)
9642 const char *directive = ASM_LONG;
9646 directive = ASM_QUAD;
9648 gcc_assert (!TARGET_64BIT);
9651 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9655 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9657 const char *directive = ASM_LONG;
9660 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9661 directive = ASM_QUAD;
9663 gcc_assert (!TARGET_64BIT);
9665 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9666 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9667 fprintf (file, "%s%s%d-%s%d\n",
9668 directive, LPREFIX, value, LPREFIX, rel);
9669 else if (HAVE_AS_GOTOFF_IN_DATA)
9670 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9672 else if (TARGET_MACHO)
9674 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9675 machopic_output_function_base_name (file);
9676 fprintf(file, "\n");
9680 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9681 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9684 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9688 ix86_expand_clear (rtx dest)
9692 /* We play register width games, which are only valid after reload. */
9693 gcc_assert (reload_completed);
9695 /* Avoid HImode and its attendant prefix byte. */
9696 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9697 dest = gen_rtx_REG (SImode, REGNO (dest));
9698 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9700 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9701 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9703 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9704 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9710 /* X is an unchanging MEM. If it is a constant pool reference, return
9711 the constant pool rtx, else NULL. */
9714 maybe_get_pool_constant (rtx x)
9716 x = ix86_delegitimize_address (XEXP (x, 0));
9718 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9719 return get_pool_constant (x);
9725 ix86_expand_move (enum machine_mode mode, rtx operands[])
9727 int strict = (reload_in_progress || reload_completed);
9729 enum tls_model model;
9734 if (GET_CODE (op1) == SYMBOL_REF)
9736 model = SYMBOL_REF_TLS_MODEL (op1);
9739 op1 = legitimize_tls_address (op1, model, true);
9740 op1 = force_operand (op1, op0);
9744 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9745 && SYMBOL_REF_DLLIMPORT_P (op1))
9746 op1 = legitimize_dllimport_symbol (op1, false);
9748 else if (GET_CODE (op1) == CONST
9749 && GET_CODE (XEXP (op1, 0)) == PLUS
9750 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9752 rtx addend = XEXP (XEXP (op1, 0), 1);
9753 rtx symbol = XEXP (XEXP (op1, 0), 0);
9756 model = SYMBOL_REF_TLS_MODEL (symbol);
9758 tmp = legitimize_tls_address (symbol, model, true);
9759 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9760 && SYMBOL_REF_DLLIMPORT_P (symbol))
9761 tmp = legitimize_dllimport_symbol (symbol, true);
9765 tmp = force_operand (tmp, NULL);
9766 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9767 op0, 1, OPTAB_DIRECT);
9773 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9775 if (TARGET_MACHO && !TARGET_64BIT)
9780 rtx temp = ((reload_in_progress
9781 || ((op0 && REG_P (op0))
9783 ? op0 : gen_reg_rtx (Pmode));
9784 op1 = machopic_indirect_data_reference (op1, temp);
9785 op1 = machopic_legitimize_pic_address (op1, mode,
9786 temp == op1 ? 0 : temp);
9788 else if (MACHOPIC_INDIRECT)
9789 op1 = machopic_indirect_data_reference (op1, 0);
9797 op1 = force_reg (Pmode, op1);
9798 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9800 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9801 op1 = legitimize_pic_address (op1, reg);
9810 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9811 || !push_operand (op0, mode))
9813 op1 = force_reg (mode, op1);
9815 if (push_operand (op0, mode)
9816 && ! general_no_elim_operand (op1, mode))
9817 op1 = copy_to_mode_reg (mode, op1);
9819 /* Force large constants in 64bit compilation into register
9820 to get them CSEed. */
9821 if (TARGET_64BIT && mode == DImode
9822 && immediate_operand (op1, mode)
9823 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9824 && !register_operand (op0, mode)
9825 && optimize && !reload_completed && !reload_in_progress)
9826 op1 = copy_to_mode_reg (mode, op1);
9828 if (FLOAT_MODE_P (mode))
9830 /* If we are loading a floating point constant to a register,
9831 force the value to memory now, since we'll get better code
9832 out the back end. */
9836 else if (GET_CODE (op1) == CONST_DOUBLE)
9838 op1 = validize_mem (force_const_mem (mode, op1));
9839 if (!register_operand (op0, mode))
9841 rtx temp = gen_reg_rtx (mode);
9842 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9843 emit_move_insn (op0, temp);
9850 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9854 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9856 rtx op0 = operands[0], op1 = operands[1];
9857 unsigned int align = GET_MODE_ALIGNMENT (mode);
9859 /* Force constants other than zero into memory. We do not know how
9860 the instructions used to build constants modify the upper 64 bits
9861 of the register, once we have that information we may be able
9862 to handle some of them more efficiently. */
9863 if ((reload_in_progress | reload_completed) == 0
9864 && register_operand (op0, mode)
9865 && (CONSTANT_P (op1)
9866 || (GET_CODE (op1) == SUBREG
9867 && CONSTANT_P (SUBREG_REG (op1))))
9868 && standard_sse_constant_p (op1) <= 0)
9869 op1 = validize_mem (force_const_mem (mode, op1));
9871 /* TDmode values are passed as TImode on the stack. Timode values
9872 are moved via xmm registers, and moving them to stack can result in
9873 unaligned memory access. Use ix86_expand_vector_move_misalign()
9874 if memory operand is not aligned correctly. */
9876 && (mode == TImode) && !TARGET_64BIT
9877 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9878 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9882 /* ix86_expand_vector_move_misalign() does not like constants ... */
9883 if (CONSTANT_P (op1)
9884 || (GET_CODE (op1) == SUBREG
9885 && CONSTANT_P (SUBREG_REG (op1))))
9886 op1 = validize_mem (force_const_mem (mode, op1));
9888 /* ... nor both arguments in memory. */
9889 if (!register_operand (op0, mode)
9890 && !register_operand (op1, mode))
9891 op1 = force_reg (mode, op1);
9893 tmp[0] = op0; tmp[1] = op1;
9894 ix86_expand_vector_move_misalign (mode, tmp);
9898 /* Make operand1 a register if it isn't already. */
9900 && !register_operand (op0, mode)
9901 && !register_operand (op1, mode))
9903 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9907 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9910 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9911 straight to ix86_expand_vector_move. */
9912 /* Code generation for scalar reg-reg moves of single and double precision data:
9913 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9917 if (x86_sse_partial_reg_dependency == true)
9922 Code generation for scalar loads of double precision data:
9923 if (x86_sse_split_regs == true)
9924 movlpd mem, reg (gas syntax)
9928 Code generation for unaligned packed loads of single precision data
9929 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9930 if (x86_sse_unaligned_move_optimal)
9933 if (x86_sse_partial_reg_dependency == true)
9945 Code generation for unaligned packed loads of double precision data
9946 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9947 if (x86_sse_unaligned_move_optimal)
9950 if (x86_sse_split_regs == true)
9963 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9972 /* If we're optimizing for size, movups is the smallest. */
9975 op0 = gen_lowpart (V4SFmode, op0);
9976 op1 = gen_lowpart (V4SFmode, op1);
9977 emit_insn (gen_sse_movups (op0, op1));
9981 /* ??? If we have typed data, then it would appear that using
9982 movdqu is the only way to get unaligned data loaded with
9984 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9986 op0 = gen_lowpart (V16QImode, op0);
9987 op1 = gen_lowpart (V16QImode, op1);
9988 emit_insn (gen_sse2_movdqu (op0, op1));
9992 if (TARGET_SSE2 && mode == V2DFmode)
9996 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9998 op0 = gen_lowpart (V2DFmode, op0);
9999 op1 = gen_lowpart (V2DFmode, op1);
10000 emit_insn (gen_sse2_movupd (op0, op1));
10004 /* When SSE registers are split into halves, we can avoid
10005 writing to the top half twice. */
10006 if (TARGET_SSE_SPLIT_REGS)
10008 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10013 /* ??? Not sure about the best option for the Intel chips.
10014 The following would seem to satisfy; the register is
10015 entirely cleared, breaking the dependency chain. We
10016 then store to the upper half, with a dependency depth
10017 of one. A rumor has it that Intel recommends two movsd
10018 followed by an unpacklpd, but this is unconfirmed. And
10019 given that the dependency depth of the unpacklpd would
10020 still be one, I'm not sure why this would be better. */
10021 zero = CONST0_RTX (V2DFmode);
10024 m = adjust_address (op1, DFmode, 0);
10025 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10026 m = adjust_address (op1, DFmode, 8);
10027 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10031 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10033 op0 = gen_lowpart (V4SFmode, op0);
10034 op1 = gen_lowpart (V4SFmode, op1);
10035 emit_insn (gen_sse_movups (op0, op1));
10039 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10040 emit_move_insn (op0, CONST0_RTX (mode));
10042 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10044 if (mode != V4SFmode)
10045 op0 = gen_lowpart (V4SFmode, op0);
10046 m = adjust_address (op1, V2SFmode, 0);
10047 emit_insn (gen_sse_loadlps (op0, op0, m));
10048 m = adjust_address (op1, V2SFmode, 8);
10049 emit_insn (gen_sse_loadhps (op0, op0, m));
10052 else if (MEM_P (op0))
10054 /* If we're optimizing for size, movups is the smallest. */
10057 op0 = gen_lowpart (V4SFmode, op0);
10058 op1 = gen_lowpart (V4SFmode, op1);
10059 emit_insn (gen_sse_movups (op0, op1));
10063 /* ??? Similar to above, only less clear because of quote
10064 typeless stores unquote. */
10065 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10066 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10068 op0 = gen_lowpart (V16QImode, op0);
10069 op1 = gen_lowpart (V16QImode, op1);
10070 emit_insn (gen_sse2_movdqu (op0, op1));
10074 if (TARGET_SSE2 && mode == V2DFmode)
10076 m = adjust_address (op0, DFmode, 0);
10077 emit_insn (gen_sse2_storelpd (m, op1));
10078 m = adjust_address (op0, DFmode, 8);
10079 emit_insn (gen_sse2_storehpd (m, op1));
10083 if (mode != V4SFmode)
10084 op1 = gen_lowpart (V4SFmode, op1);
10085 m = adjust_address (op0, V2SFmode, 0);
10086 emit_insn (gen_sse_storelps (m, op1));
10087 m = adjust_address (op0, V2SFmode, 8);
10088 emit_insn (gen_sse_storehps (m, op1));
10092 gcc_unreachable ();
10095 /* Expand a push in MODE. This is some mode for which we do not support
10096 proper push instructions, at least from the registers that we expect
10097 the value to live in. */
10100 ix86_expand_push (enum machine_mode mode, rtx x)
10104 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10105 GEN_INT (-GET_MODE_SIZE (mode)),
10106 stack_pointer_rtx, 1, OPTAB_DIRECT);
10107 if (tmp != stack_pointer_rtx)
10108 emit_move_insn (stack_pointer_rtx, tmp);
10110 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10111 emit_move_insn (tmp, x);
10114 /* Helper function of ix86_fixup_binary_operands to canonicalize
10115 operand order. Returns true if the operands should be swapped. */
10118 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10121 rtx dst = operands[0];
10122 rtx src1 = operands[1];
10123 rtx src2 = operands[2];
10125 /* If the operation is not commutative, we can't do anything. */
10126 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10129 /* Highest priority is that src1 should match dst. */
10130 if (rtx_equal_p (dst, src1))
10132 if (rtx_equal_p (dst, src2))
10135 /* Next highest priority is that immediate constants come second. */
10136 if (immediate_operand (src2, mode))
10138 if (immediate_operand (src1, mode))
10141 /* Lowest priority is that memory references should come second. */
10151 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10152 destination to use for the operation. If different from the true
10153 destination in operands[0], a copy operation will be required. */
10156 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10159 rtx dst = operands[0];
10160 rtx src1 = operands[1];
10161 rtx src2 = operands[2];
10163 /* Canonicalize operand order. */
10164 if (ix86_swap_binary_operands_p (code, mode, operands))
10171 /* Both source operands cannot be in memory. */
10172 if (MEM_P (src1) && MEM_P (src2))
10174 /* Optimization: Only read from memory once. */
10175 if (rtx_equal_p (src1, src2))
10177 src2 = force_reg (mode, src2);
10181 src2 = force_reg (mode, src2);
10184 /* If the destination is memory, and we do not have matching source
10185 operands, do things in registers. */
10186 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10187 dst = gen_reg_rtx (mode);
10189 /* Source 1 cannot be a constant. */
10190 if (CONSTANT_P (src1))
10191 src1 = force_reg (mode, src1);
10193 /* Source 1 cannot be a non-matching memory. */
10194 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10195 src1 = force_reg (mode, src1);
10197 operands[1] = src1;
10198 operands[2] = src2;
10202 /* Similarly, but assume that the destination has already been
10203 set up properly. */
10206 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10207 enum machine_mode mode, rtx operands[])
10209 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10210 gcc_assert (dst == operands[0]);
10213 /* Attempt to expand a binary operator. Make the expansion closer to the
10214 actual machine, then just general_operand, which will allow 3 separate
10215 memory references (one output, two input) in a single insn. */
10218 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10221 rtx src1, src2, dst, op, clob;
10223 dst = ix86_fixup_binary_operands (code, mode, operands);
10224 src1 = operands[1];
10225 src2 = operands[2];
10227 /* Emit the instruction. */
10229 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10230 if (reload_in_progress)
10232 /* Reload doesn't know about the flags register, and doesn't know that
10233 it doesn't want to clobber it. We can only do this with PLUS. */
10234 gcc_assert (code == PLUS);
10239 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10240 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10243 /* Fix up the destination if needed. */
10244 if (dst != operands[0])
10245 emit_move_insn (operands[0], dst);
10248 /* Return TRUE or FALSE depending on whether the binary operator meets the
10249 appropriate constraints. */
10252 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10255 rtx dst = operands[0];
10256 rtx src1 = operands[1];
10257 rtx src2 = operands[2];
10259 /* Both source operands cannot be in memory. */
10260 if (MEM_P (src1) && MEM_P (src2))
10263 /* Canonicalize operand order for commutative operators. */
10264 if (ix86_swap_binary_operands_p (code, mode, operands))
10271 /* If the destination is memory, we must have a matching source operand. */
10272 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10275 /* Source 1 cannot be a constant. */
10276 if (CONSTANT_P (src1))
10279 /* Source 1 cannot be a non-matching memory. */
10280 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10286 /* Attempt to expand a unary operator. Make the expansion closer to the
10287 actual machine, then just general_operand, which will allow 2 separate
10288 memory references (one output, one input) in a single insn. */
10291 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10294 int matching_memory;
10295 rtx src, dst, op, clob;
10300 /* If the destination is memory, and we do not have matching source
10301 operands, do things in registers. */
10302 matching_memory = 0;
10305 if (rtx_equal_p (dst, src))
10306 matching_memory = 1;
10308 dst = gen_reg_rtx (mode);
10311 /* When source operand is memory, destination must match. */
10312 if (MEM_P (src) && !matching_memory)
10313 src = force_reg (mode, src);
10315 /* Emit the instruction. */
10317 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10318 if (reload_in_progress || code == NOT)
10320 /* Reload doesn't know about the flags register, and doesn't know that
10321 it doesn't want to clobber it. */
10322 gcc_assert (code == NOT);
10327 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10328 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10331 /* Fix up the destination if needed. */
10332 if (dst != operands[0])
10333 emit_move_insn (operands[0], dst);
10336 /* Return TRUE or FALSE depending on whether the unary operator meets the
10337 appropriate constraints. */
10340 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10341 enum machine_mode mode ATTRIBUTE_UNUSED,
10342 rtx operands[2] ATTRIBUTE_UNUSED)
10344 /* If one of operands is memory, source and destination must match. */
10345 if ((MEM_P (operands[0])
10346 || MEM_P (operands[1]))
10347 && ! rtx_equal_p (operands[0], operands[1]))
10352 /* Post-reload splitter for converting an SF or DFmode value in an
10353 SSE register into an unsigned SImode. */
10356 ix86_split_convert_uns_si_sse (rtx operands[])
10358 enum machine_mode vecmode;
10359 rtx value, large, zero_or_two31, input, two31, x;
10361 large = operands[1];
10362 zero_or_two31 = operands[2];
10363 input = operands[3];
10364 two31 = operands[4];
10365 vecmode = GET_MODE (large);
10366 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10368 /* Load up the value into the low element. We must ensure that the other
10369 elements are valid floats -- zero is the easiest such value. */
10372 if (vecmode == V4SFmode)
10373 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10375 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10379 input = gen_rtx_REG (vecmode, REGNO (input));
10380 emit_move_insn (value, CONST0_RTX (vecmode));
10381 if (vecmode == V4SFmode)
10382 emit_insn (gen_sse_movss (value, value, input));
10384 emit_insn (gen_sse2_movsd (value, value, input));
10387 emit_move_insn (large, two31);
10388 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10390 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10391 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10393 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10394 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10396 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10397 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10399 large = gen_rtx_REG (V4SImode, REGNO (large));
10400 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10402 x = gen_rtx_REG (V4SImode, REGNO (value));
10403 if (vecmode == V4SFmode)
10404 emit_insn (gen_sse2_cvttps2dq (x, value));
10406 emit_insn (gen_sse2_cvttpd2dq (x, value));
10409 emit_insn (gen_xorv4si3 (value, value, large));
10412 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10413 Expects the 64-bit DImode to be supplied in a pair of integral
10414 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10415 -mfpmath=sse, !optimize_size only. */
10418 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10420 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10421 rtx int_xmm, fp_xmm;
10422 rtx biases, exponents;
10425 int_xmm = gen_reg_rtx (V4SImode);
10426 if (TARGET_INTER_UNIT_MOVES)
10427 emit_insn (gen_movdi_to_sse (int_xmm, input));
10428 else if (TARGET_SSE_SPLIT_REGS)
10430 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10431 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10435 x = gen_reg_rtx (V2DImode);
10436 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10437 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10440 x = gen_rtx_CONST_VECTOR (V4SImode,
10441 gen_rtvec (4, GEN_INT (0x43300000UL),
10442 GEN_INT (0x45300000UL),
10443 const0_rtx, const0_rtx));
10444 exponents = validize_mem (force_const_mem (V4SImode, x));
10446 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10447 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10449 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10450 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10451 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10452 (0x1.0p84 + double(fp_value_hi_xmm)).
10453 Note these exponents differ by 32. */
10455 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10457 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10458 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10459 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10460 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10461 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10462 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10463 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10464 biases = validize_mem (force_const_mem (V2DFmode, biases));
10465 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10467 /* Add the upper and lower DFmode values together. */
10469 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10472 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10473 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10474 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10477 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10480 /* Convert an unsigned SImode value into a DFmode. Only currently used
10481 for SSE, but applicable anywhere. */
10484 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10486 REAL_VALUE_TYPE TWO31r;
10489 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10490 NULL, 1, OPTAB_DIRECT);
10492 fp = gen_reg_rtx (DFmode);
10493 emit_insn (gen_floatsidf2 (fp, x));
10495 real_ldexp (&TWO31r, &dconst1, 31);
10496 x = const_double_from_real_value (TWO31r, DFmode);
10498 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10500 emit_move_insn (target, x);
10503 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10504 32-bit mode; otherwise we have a direct convert instruction. */
10507 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10509 REAL_VALUE_TYPE TWO32r;
10510 rtx fp_lo, fp_hi, x;
10512 fp_lo = gen_reg_rtx (DFmode);
10513 fp_hi = gen_reg_rtx (DFmode);
10515 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10517 real_ldexp (&TWO32r, &dconst1, 32);
10518 x = const_double_from_real_value (TWO32r, DFmode);
10519 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10521 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10523 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10526 emit_move_insn (target, x);
10529 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10530 For x86_32, -mfpmath=sse, !optimize_size only. */
10532 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10534 REAL_VALUE_TYPE ONE16r;
10535 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10537 real_ldexp (&ONE16r, &dconst1, 16);
10538 x = const_double_from_real_value (ONE16r, SFmode);
10539 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10540 NULL, 0, OPTAB_DIRECT);
10541 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10542 NULL, 0, OPTAB_DIRECT);
10543 fp_hi = gen_reg_rtx (SFmode);
10544 fp_lo = gen_reg_rtx (SFmode);
10545 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10546 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10547 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10549 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10551 if (!rtx_equal_p (target, fp_hi))
10552 emit_move_insn (target, fp_hi);
10555 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10556 then replicate the value for all elements of the vector
10560 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10567 v = gen_rtvec (4, value, value, value, value);
10568 return gen_rtx_CONST_VECTOR (V4SImode, v);
10572 v = gen_rtvec (2, value, value);
10573 return gen_rtx_CONST_VECTOR (V2DImode, v);
10577 v = gen_rtvec (4, value, value, value, value);
10579 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10580 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10581 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10585 v = gen_rtvec (2, value, value);
10587 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10588 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10591 gcc_unreachable ();
10595 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10596 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10597 for an SSE register. If VECT is true, then replicate the mask for
10598 all elements of the vector register. If INVERT is true, then create
10599 a mask excluding the sign bit. */
10602 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10604 enum machine_mode vec_mode, imode;
10605 HOST_WIDE_INT hi, lo;
10610 /* Find the sign bit, sign extended to 2*HWI. */
10616 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10617 lo = 0x80000000, hi = lo < 0;
10623 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10624 if (HOST_BITS_PER_WIDE_INT >= 64)
10625 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10627 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10633 vec_mode = VOIDmode;
10634 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10635 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10639 gcc_unreachable ();
10643 lo = ~lo, hi = ~hi;
10645 /* Force this value into the low part of a fp vector constant. */
10646 mask = immed_double_const (lo, hi, imode);
10647 mask = gen_lowpart (mode, mask);
10649 if (vec_mode == VOIDmode)
10650 return force_reg (mode, mask);
10652 v = ix86_build_const_vector (mode, vect, mask);
10653 return force_reg (vec_mode, v);
10656 /* Generate code for floating point ABS or NEG. */
10659 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10662 rtx mask, set, use, clob, dst, src;
10663 bool matching_memory;
10664 bool use_sse = false;
10665 bool vector_mode = VECTOR_MODE_P (mode);
10666 enum machine_mode elt_mode = mode;
10670 elt_mode = GET_MODE_INNER (mode);
10673 else if (mode == TFmode)
10675 else if (TARGET_SSE_MATH)
10676 use_sse = SSE_FLOAT_MODE_P (mode);
10678 /* NEG and ABS performed with SSE use bitwise mask operations.
10679 Create the appropriate mask now. */
10681 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10688 /* If the destination is memory, and we don't have matching source
10689 operands or we're using the x87, do things in registers. */
10690 matching_memory = false;
10693 if (use_sse && rtx_equal_p (dst, src))
10694 matching_memory = true;
10696 dst = gen_reg_rtx (mode);
10698 if (MEM_P (src) && !matching_memory)
10699 src = force_reg (mode, src);
10703 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10704 set = gen_rtx_SET (VOIDmode, dst, set);
10709 set = gen_rtx_fmt_e (code, mode, src);
10710 set = gen_rtx_SET (VOIDmode, dst, set);
10713 use = gen_rtx_USE (VOIDmode, mask);
10714 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10715 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10716 gen_rtvec (3, set, use, clob)));
10722 if (dst != operands[0])
10723 emit_move_insn (operands[0], dst);
10726 /* Expand a copysign operation. Special case operand 0 being a constant. */
10729 ix86_expand_copysign (rtx operands[])
10731 enum machine_mode mode, vmode;
10732 rtx dest, op0, op1, mask, nmask;
10734 dest = operands[0];
10738 mode = GET_MODE (dest);
10739 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10741 if (GET_CODE (op0) == CONST_DOUBLE)
10743 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10745 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10746 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10748 if (mode == SFmode || mode == DFmode)
10750 if (op0 == CONST0_RTX (mode))
10751 op0 = CONST0_RTX (vmode);
10756 if (mode == SFmode)
10757 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10758 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10760 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10761 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10765 mask = ix86_build_signbit_mask (mode, 0, 0);
10767 if (mode == SFmode)
10768 copysign_insn = gen_copysignsf3_const;
10769 else if (mode == DFmode)
10770 copysign_insn = gen_copysigndf3_const;
10772 copysign_insn = gen_copysigntf3_const;
10774 emit_insn (copysign_insn (dest, op0, op1, mask));
10778 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10780 nmask = ix86_build_signbit_mask (mode, 0, 1);
10781 mask = ix86_build_signbit_mask (mode, 0, 0);
10783 if (mode == SFmode)
10784 copysign_insn = gen_copysignsf3_var;
10785 else if (mode == DFmode)
10786 copysign_insn = gen_copysigndf3_var;
10788 copysign_insn = gen_copysigntf3_var;
10790 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10794 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10795 be a constant, and so has already been expanded into a vector constant. */
10798 ix86_split_copysign_const (rtx operands[])
10800 enum machine_mode mode, vmode;
10801 rtx dest, op0, op1, mask, x;
10803 dest = operands[0];
10806 mask = operands[3];
10808 mode = GET_MODE (dest);
10809 vmode = GET_MODE (mask);
10811 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10812 x = gen_rtx_AND (vmode, dest, mask);
10813 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10815 if (op0 != CONST0_RTX (vmode))
10817 x = gen_rtx_IOR (vmode, dest, op0);
10818 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10822 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10823 so we have to do two masks. */
10826 ix86_split_copysign_var (rtx operands[])
10828 enum machine_mode mode, vmode;
10829 rtx dest, scratch, op0, op1, mask, nmask, x;
10831 dest = operands[0];
10832 scratch = operands[1];
10835 nmask = operands[4];
10836 mask = operands[5];
10838 mode = GET_MODE (dest);
10839 vmode = GET_MODE (mask);
10841 if (rtx_equal_p (op0, op1))
10843 /* Shouldn't happen often (it's useless, obviously), but when it does
10844 we'd generate incorrect code if we continue below. */
10845 emit_move_insn (dest, op0);
10849 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10851 gcc_assert (REGNO (op1) == REGNO (scratch));
10853 x = gen_rtx_AND (vmode, scratch, mask);
10854 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10857 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10858 x = gen_rtx_NOT (vmode, dest);
10859 x = gen_rtx_AND (vmode, x, op0);
10860 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10864 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10866 x = gen_rtx_AND (vmode, scratch, mask);
10868 else /* alternative 2,4 */
10870 gcc_assert (REGNO (mask) == REGNO (scratch));
10871 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10872 x = gen_rtx_AND (vmode, scratch, op1);
10874 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10876 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10878 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10879 x = gen_rtx_AND (vmode, dest, nmask);
10881 else /* alternative 3,4 */
10883 gcc_assert (REGNO (nmask) == REGNO (dest));
10885 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10886 x = gen_rtx_AND (vmode, dest, op0);
10888 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10891 x = gen_rtx_IOR (vmode, dest, scratch);
10892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10895 /* Return TRUE or FALSE depending on whether the first SET in INSN
10896 has source and destination with matching CC modes, and that the
10897 CC mode is at least as constrained as REQ_MODE. */
10900 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10903 enum machine_mode set_mode;
10905 set = PATTERN (insn);
10906 if (GET_CODE (set) == PARALLEL)
10907 set = XVECEXP (set, 0, 0);
10908 gcc_assert (GET_CODE (set) == SET);
10909 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10911 set_mode = GET_MODE (SET_DEST (set));
10915 if (req_mode != CCNOmode
10916 && (req_mode != CCmode
10917 || XEXP (SET_SRC (set), 1) != const0_rtx))
10921 if (req_mode == CCGCmode)
10925 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10929 if (req_mode == CCZmode)
10936 gcc_unreachable ();
10939 return (GET_MODE (SET_SRC (set)) == set_mode);
10942 /* Generate insn patterns to do an integer compare of OPERANDS. */
10945 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10947 enum machine_mode cmpmode;
10950 cmpmode = SELECT_CC_MODE (code, op0, op1);
10951 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10953 /* This is very simple, but making the interface the same as in the
10954 FP case makes the rest of the code easier. */
10955 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10956 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10958 /* Return the test that should be put into the flags user, i.e.
10959 the bcc, scc, or cmov instruction. */
10960 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10963 /* Figure out whether to use ordered or unordered fp comparisons.
10964 Return the appropriate mode to use. */
10967 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10969 /* ??? In order to make all comparisons reversible, we do all comparisons
10970 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10971 all forms trapping and nontrapping comparisons, we can make inequality
10972 comparisons trapping again, since it results in better code when using
10973 FCOM based compares. */
10974 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10978 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10980 enum machine_mode mode = GET_MODE (op0);
10982 if (SCALAR_FLOAT_MODE_P (mode))
10984 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10985 return ix86_fp_compare_mode (code);
10990 /* Only zero flag is needed. */
10991 case EQ: /* ZF=0 */
10992 case NE: /* ZF!=0 */
10994 /* Codes needing carry flag. */
10995 case GEU: /* CF=0 */
10996 case GTU: /* CF=0 & ZF=0 */
10997 case LTU: /* CF=1 */
10998 case LEU: /* CF=1 | ZF=1 */
11000 /* Codes possibly doable only with sign flag when
11001 comparing against zero. */
11002 case GE: /* SF=OF or SF=0 */
11003 case LT: /* SF<>OF or SF=1 */
11004 if (op1 == const0_rtx)
11007 /* For other cases Carry flag is not required. */
11009 /* Codes doable only with sign flag when comparing
11010 against zero, but we miss jump instruction for it
11011 so we need to use relational tests against overflow
11012 that thus needs to be zero. */
11013 case GT: /* ZF=0 & SF=OF */
11014 case LE: /* ZF=1 | SF<>OF */
11015 if (op1 == const0_rtx)
11019 /* strcmp pattern do (use flags) and combine may ask us for proper
11024 gcc_unreachable ();
11028 /* Return the fixed registers used for condition codes. */
11031 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11038 /* If two condition code modes are compatible, return a condition code
11039 mode which is compatible with both. Otherwise, return
11042 static enum machine_mode
11043 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11048 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11051 if ((m1 == CCGCmode && m2 == CCGOCmode)
11052 || (m1 == CCGOCmode && m2 == CCGCmode))
11058 gcc_unreachable ();
11088 /* These are only compatible with themselves, which we already
11094 /* Split comparison code CODE into comparisons we can do using branch
11095 instructions. BYPASS_CODE is comparison code for branch that will
11096 branch around FIRST_CODE and SECOND_CODE. If some of branches
11097 is not required, set value to UNKNOWN.
11098 We never require more than two branches. */
11101 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11102 enum rtx_code *first_code,
11103 enum rtx_code *second_code)
11105 *first_code = code;
11106 *bypass_code = UNKNOWN;
11107 *second_code = UNKNOWN;
11109 /* The fcomi comparison sets flags as follows:
11119 case GT: /* GTU - CF=0 & ZF=0 */
11120 case GE: /* GEU - CF=0 */
11121 case ORDERED: /* PF=0 */
11122 case UNORDERED: /* PF=1 */
11123 case UNEQ: /* EQ - ZF=1 */
11124 case UNLT: /* LTU - CF=1 */
11125 case UNLE: /* LEU - CF=1 | ZF=1 */
11126 case LTGT: /* EQ - ZF=0 */
11128 case LT: /* LTU - CF=1 - fails on unordered */
11129 *first_code = UNLT;
11130 *bypass_code = UNORDERED;
11132 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11133 *first_code = UNLE;
11134 *bypass_code = UNORDERED;
11136 case EQ: /* EQ - ZF=1 - fails on unordered */
11137 *first_code = UNEQ;
11138 *bypass_code = UNORDERED;
11140 case NE: /* NE - ZF=0 - fails on unordered */
11141 *first_code = LTGT;
11142 *second_code = UNORDERED;
11144 case UNGE: /* GEU - CF=0 - fails on unordered */
11146 *second_code = UNORDERED;
11148 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11150 *second_code = UNORDERED;
11153 gcc_unreachable ();
11155 if (!TARGET_IEEE_FP)
11157 *second_code = UNKNOWN;
11158 *bypass_code = UNKNOWN;
11162 /* Return cost of comparison done fcom + arithmetics operations on AX.
11163 All following functions do use number of instructions as a cost metrics.
11164 In future this should be tweaked to compute bytes for optimize_size and
11165 take into account performance of various instructions on various CPUs. */
11167 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11169 if (!TARGET_IEEE_FP)
11171 /* The cost of code output by ix86_expand_fp_compare. */
11195 gcc_unreachable ();
11199 /* Return cost of comparison done using fcomi operation.
11200 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11202 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11204 enum rtx_code bypass_code, first_code, second_code;
11205 /* Return arbitrarily high cost when instruction is not supported - this
11206 prevents gcc from using it. */
11209 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11210 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11213 /* Return cost of comparison done using sahf operation.
11214 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11216 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11218 enum rtx_code bypass_code, first_code, second_code;
11219 /* Return arbitrarily high cost when instruction is not preferred - this
11220 avoids gcc from using it. */
11221 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11223 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11224 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11227 /* Compute cost of the comparison done using any method.
11228 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11230 ix86_fp_comparison_cost (enum rtx_code code)
11232 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11235 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11236 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11238 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11239 if (min > sahf_cost)
11241 if (min > fcomi_cost)
11246 /* Return true if we should use an FCOMI instruction for this
11250 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11252 enum rtx_code swapped_code = swap_condition (code);
11254 return ((ix86_fp_comparison_cost (code)
11255 == ix86_fp_comparison_fcomi_cost (code))
11256 || (ix86_fp_comparison_cost (swapped_code)
11257 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11260 /* Swap, force into registers, or otherwise massage the two operands
11261 to a fp comparison. The operands are updated in place; the new
11262 comparison code is returned. */
11264 static enum rtx_code
11265 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11267 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11268 rtx op0 = *pop0, op1 = *pop1;
11269 enum machine_mode op_mode = GET_MODE (op0);
11270 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11272 /* All of the unordered compare instructions only work on registers.
11273 The same is true of the fcomi compare instructions. The XFmode
11274 compare instructions require registers except when comparing
11275 against zero or when converting operand 1 from fixed point to
11279 && (fpcmp_mode == CCFPUmode
11280 || (op_mode == XFmode
11281 && ! (standard_80387_constant_p (op0) == 1
11282 || standard_80387_constant_p (op1) == 1)
11283 && GET_CODE (op1) != FLOAT)
11284 || ix86_use_fcomi_compare (code)))
11286 op0 = force_reg (op_mode, op0);
11287 op1 = force_reg (op_mode, op1);
11291 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11292 things around if they appear profitable, otherwise force op0
11293 into a register. */
11295 if (standard_80387_constant_p (op0) == 0
11297 && ! (standard_80387_constant_p (op1) == 0
11301 tmp = op0, op0 = op1, op1 = tmp;
11302 code = swap_condition (code);
11306 op0 = force_reg (op_mode, op0);
11308 if (CONSTANT_P (op1))
11310 int tmp = standard_80387_constant_p (op1);
11312 op1 = validize_mem (force_const_mem (op_mode, op1));
11316 op1 = force_reg (op_mode, op1);
11319 op1 = force_reg (op_mode, op1);
11323 /* Try to rearrange the comparison to make it cheaper. */
11324 if (ix86_fp_comparison_cost (code)
11325 > ix86_fp_comparison_cost (swap_condition (code))
11326 && (REG_P (op1) || !no_new_pseudos))
11329 tmp = op0, op0 = op1, op1 = tmp;
11330 code = swap_condition (code);
11332 op0 = force_reg (op_mode, op0);
11340 /* Convert comparison codes we use to represent FP comparison to integer
11341 code that will result in proper branch. Return UNKNOWN if no such code
11345 ix86_fp_compare_code_to_integer (enum rtx_code code)
11374 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11377 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11378 rtx *second_test, rtx *bypass_test)
11380 enum machine_mode fpcmp_mode, intcmp_mode;
11382 int cost = ix86_fp_comparison_cost (code);
11383 enum rtx_code bypass_code, first_code, second_code;
11385 fpcmp_mode = ix86_fp_compare_mode (code);
11386 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11389 *second_test = NULL_RTX;
11391 *bypass_test = NULL_RTX;
11393 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11395 /* Do fcomi/sahf based test when profitable. */
11396 if ((TARGET_CMOVE || TARGET_SAHF)
11397 && (bypass_code == UNKNOWN || bypass_test)
11398 && (second_code == UNKNOWN || second_test)
11399 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11403 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11404 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11410 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11411 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11413 scratch = gen_reg_rtx (HImode);
11414 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11415 emit_insn (gen_x86_sahf_1 (scratch));
11418 /* The FP codes work out to act like unsigned. */
11419 intcmp_mode = fpcmp_mode;
11421 if (bypass_code != UNKNOWN)
11422 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11423 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11425 if (second_code != UNKNOWN)
11426 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11427 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11432 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11433 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11434 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11436 scratch = gen_reg_rtx (HImode);
11437 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11439 /* In the unordered case, we have to check C2 for NaN's, which
11440 doesn't happen to work out to anything nice combination-wise.
11441 So do some bit twiddling on the value we've got in AH to come
11442 up with an appropriate set of condition codes. */
11444 intcmp_mode = CCNOmode;
11449 if (code == GT || !TARGET_IEEE_FP)
11451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11456 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11457 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11458 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11459 intcmp_mode = CCmode;
11465 if (code == LT && TARGET_IEEE_FP)
11467 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11468 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11469 intcmp_mode = CCmode;
11474 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11480 if (code == GE || !TARGET_IEEE_FP)
11482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11487 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11488 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11495 if (code == LE && TARGET_IEEE_FP)
11497 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11498 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11499 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11500 intcmp_mode = CCmode;
11505 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11511 if (code == EQ && TARGET_IEEE_FP)
11513 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11514 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11515 intcmp_mode = CCmode;
11520 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11527 if (code == NE && TARGET_IEEE_FP)
11529 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11530 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11546 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11551 gcc_unreachable ();
11555 /* Return the test that should be put into the flags user, i.e.
11556 the bcc, scc, or cmov instruction. */
11557 return gen_rtx_fmt_ee (code, VOIDmode,
11558 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11563 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11566 op0 = ix86_compare_op0;
11567 op1 = ix86_compare_op1;
11570 *second_test = NULL_RTX;
11572 *bypass_test = NULL_RTX;
11574 if (ix86_compare_emitted)
11576 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11577 ix86_compare_emitted = NULL_RTX;
11579 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11581 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11582 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11583 second_test, bypass_test);
11586 ret = ix86_expand_int_compare (code, op0, op1);
11591 /* Return true if the CODE will result in nontrivial jump sequence. */
11593 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11595 enum rtx_code bypass_code, first_code, second_code;
11598 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11599 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11603 ix86_expand_branch (enum rtx_code code, rtx label)
11607 /* If we have emitted a compare insn, go straight to simple.
11608 ix86_expand_compare won't emit anything if ix86_compare_emitted
11610 if (ix86_compare_emitted)
11613 switch (GET_MODE (ix86_compare_op0))
11619 tmp = ix86_expand_compare (code, NULL, NULL);
11620 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11621 gen_rtx_LABEL_REF (VOIDmode, label),
11623 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11632 enum rtx_code bypass_code, first_code, second_code;
11634 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11635 &ix86_compare_op1);
11637 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11639 /* Check whether we will use the natural sequence with one jump. If
11640 so, we can expand jump early. Otherwise delay expansion by
11641 creating compound insn to not confuse optimizers. */
11642 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11645 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11646 gen_rtx_LABEL_REF (VOIDmode, label),
11647 pc_rtx, NULL_RTX, NULL_RTX);
11651 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11652 ix86_compare_op0, ix86_compare_op1);
11653 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11654 gen_rtx_LABEL_REF (VOIDmode, label),
11656 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11658 use_fcomi = ix86_use_fcomi_compare (code);
11659 vec = rtvec_alloc (3 + !use_fcomi);
11660 RTVEC_ELT (vec, 0) = tmp;
11662 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11664 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11667 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11669 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11678 /* Expand DImode branch into multiple compare+branch. */
11680 rtx lo[2], hi[2], label2;
11681 enum rtx_code code1, code2, code3;
11682 enum machine_mode submode;
11684 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11686 tmp = ix86_compare_op0;
11687 ix86_compare_op0 = ix86_compare_op1;
11688 ix86_compare_op1 = tmp;
11689 code = swap_condition (code);
11691 if (GET_MODE (ix86_compare_op0) == DImode)
11693 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11694 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11699 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11700 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11704 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11705 avoid two branches. This costs one extra insn, so disable when
11706 optimizing for size. */
11708 if ((code == EQ || code == NE)
11710 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11715 if (hi[1] != const0_rtx)
11716 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11717 NULL_RTX, 0, OPTAB_WIDEN);
11720 if (lo[1] != const0_rtx)
11721 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11722 NULL_RTX, 0, OPTAB_WIDEN);
11724 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11725 NULL_RTX, 0, OPTAB_WIDEN);
11727 ix86_compare_op0 = tmp;
11728 ix86_compare_op1 = const0_rtx;
11729 ix86_expand_branch (code, label);
11733 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11734 op1 is a constant and the low word is zero, then we can just
11735 examine the high word. */
11737 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11740 case LT: case LTU: case GE: case GEU:
11741 ix86_compare_op0 = hi[0];
11742 ix86_compare_op1 = hi[1];
11743 ix86_expand_branch (code, label);
11749 /* Otherwise, we need two or three jumps. */
11751 label2 = gen_label_rtx ();
11754 code2 = swap_condition (code);
11755 code3 = unsigned_condition (code);
11759 case LT: case GT: case LTU: case GTU:
11762 case LE: code1 = LT; code2 = GT; break;
11763 case GE: code1 = GT; code2 = LT; break;
11764 case LEU: code1 = LTU; code2 = GTU; break;
11765 case GEU: code1 = GTU; code2 = LTU; break;
11767 case EQ: code1 = UNKNOWN; code2 = NE; break;
11768 case NE: code2 = UNKNOWN; break;
11771 gcc_unreachable ();
11776 * if (hi(a) < hi(b)) goto true;
11777 * if (hi(a) > hi(b)) goto false;
11778 * if (lo(a) < lo(b)) goto true;
11782 ix86_compare_op0 = hi[0];
11783 ix86_compare_op1 = hi[1];
11785 if (code1 != UNKNOWN)
11786 ix86_expand_branch (code1, label);
11787 if (code2 != UNKNOWN)
11788 ix86_expand_branch (code2, label2);
11790 ix86_compare_op0 = lo[0];
11791 ix86_compare_op1 = lo[1];
11792 ix86_expand_branch (code3, label);
11794 if (code2 != UNKNOWN)
11795 emit_label (label2);
11800 gcc_unreachable ();
11804 /* Split branch based on floating point condition. */
11806 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11807 rtx target1, rtx target2, rtx tmp, rtx pushed)
11809 rtx second, bypass;
11810 rtx label = NULL_RTX;
11812 int bypass_probability = -1, second_probability = -1, probability = -1;
11815 if (target2 != pc_rtx)
11818 code = reverse_condition_maybe_unordered (code);
11823 condition = ix86_expand_fp_compare (code, op1, op2,
11824 tmp, &second, &bypass);
11826 /* Remove pushed operand from stack. */
11828 ix86_free_from_memory (GET_MODE (pushed));
11830 if (split_branch_probability >= 0)
11832 /* Distribute the probabilities across the jumps.
11833 Assume the BYPASS and SECOND to be always test
11835 probability = split_branch_probability;
11837 /* Value of 1 is low enough to make no need for probability
11838 to be updated. Later we may run some experiments and see
11839 if unordered values are more frequent in practice. */
11841 bypass_probability = 1;
11843 second_probability = 1;
11845 if (bypass != NULL_RTX)
11847 label = gen_label_rtx ();
11848 i = emit_jump_insn (gen_rtx_SET
11850 gen_rtx_IF_THEN_ELSE (VOIDmode,
11852 gen_rtx_LABEL_REF (VOIDmode,
11855 if (bypass_probability >= 0)
11857 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11858 GEN_INT (bypass_probability),
11861 i = emit_jump_insn (gen_rtx_SET
11863 gen_rtx_IF_THEN_ELSE (VOIDmode,
11864 condition, target1, target2)));
11865 if (probability >= 0)
11867 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11868 GEN_INT (probability),
11870 if (second != NULL_RTX)
11872 i = emit_jump_insn (gen_rtx_SET
11874 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11876 if (second_probability >= 0)
11878 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11879 GEN_INT (second_probability),
11882 if (label != NULL_RTX)
11883 emit_label (label);
11887 ix86_expand_setcc (enum rtx_code code, rtx dest)
11889 rtx ret, tmp, tmpreg, equiv;
11890 rtx second_test, bypass_test;
11892 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11893 return 0; /* FAIL */
11895 gcc_assert (GET_MODE (dest) == QImode);
11897 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11898 PUT_MODE (ret, QImode);
11903 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11904 if (bypass_test || second_test)
11906 rtx test = second_test;
11908 rtx tmp2 = gen_reg_rtx (QImode);
11911 gcc_assert (!second_test);
11912 test = bypass_test;
11914 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11916 PUT_MODE (test, QImode);
11917 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11920 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11922 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11925 /* Attach a REG_EQUAL note describing the comparison result. */
11926 if (ix86_compare_op0 && ix86_compare_op1)
11928 equiv = simplify_gen_relational (code, QImode,
11929 GET_MODE (ix86_compare_op0),
11930 ix86_compare_op0, ix86_compare_op1);
11931 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11934 return 1; /* DONE */
11937 /* Expand comparison setting or clearing carry flag. Return true when
11938 successful and set pop for the operation. */
11940 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11942 enum machine_mode mode =
11943 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11945 /* Do not handle DImode compares that go through special path.
11946 Also we can't deal with FP compares yet. This is possible to add. */
11947 if (mode == (TARGET_64BIT ? TImode : DImode))
11950 if (SCALAR_FLOAT_MODE_P (mode))
11952 rtx second_test = NULL, bypass_test = NULL;
11953 rtx compare_op, compare_seq;
11955 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11957 /* Shortcut: following common codes never translate
11958 into carry flag compares. */
11959 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11960 || code == ORDERED || code == UNORDERED)
11963 /* These comparisons require zero flag; swap operands so they won't. */
11964 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11965 && !TARGET_IEEE_FP)
11970 code = swap_condition (code);
11973 /* Try to expand the comparison and verify that we end up with carry flag
11974 based comparison. This is fails to be true only when we decide to expand
11975 comparison using arithmetic that is not too common scenario. */
11977 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11978 &second_test, &bypass_test);
11979 compare_seq = get_insns ();
11982 if (second_test || bypass_test)
11984 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11985 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11986 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11988 code = GET_CODE (compare_op);
11989 if (code != LTU && code != GEU)
11991 emit_insn (compare_seq);
11995 if (!INTEGRAL_MODE_P (mode))
12003 /* Convert a==0 into (unsigned)a<1. */
12006 if (op1 != const0_rtx)
12009 code = (code == EQ ? LTU : GEU);
12012 /* Convert a>b into b<a or a>=b-1. */
12015 if (CONST_INT_P (op1))
12017 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12018 /* Bail out on overflow. We still can swap operands but that
12019 would force loading of the constant into register. */
12020 if (op1 == const0_rtx
12021 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12023 code = (code == GTU ? GEU : LTU);
12030 code = (code == GTU ? LTU : GEU);
12034 /* Convert a>=0 into (unsigned)a<0x80000000. */
12037 if (mode == DImode || op1 != const0_rtx)
12039 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12040 code = (code == LT ? GEU : LTU);
12044 if (mode == DImode || op1 != constm1_rtx)
12046 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12047 code = (code == LE ? GEU : LTU);
12053 /* Swapping operands may cause constant to appear as first operand. */
12054 if (!nonimmediate_operand (op0, VOIDmode))
12056 if (no_new_pseudos)
12058 op0 = force_reg (mode, op0);
12060 ix86_compare_op0 = op0;
12061 ix86_compare_op1 = op1;
12062 *pop = ix86_expand_compare (code, NULL, NULL);
12063 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12068 ix86_expand_int_movcc (rtx operands[])
12070 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12071 rtx compare_seq, compare_op;
12072 rtx second_test, bypass_test;
12073 enum machine_mode mode = GET_MODE (operands[0]);
12074 bool sign_bit_compare_p = false;;
12077 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12078 compare_seq = get_insns ();
12081 compare_code = GET_CODE (compare_op);
12083 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12084 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12085 sign_bit_compare_p = true;
12087 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12088 HImode insns, we'd be swallowed in word prefix ops. */
12090 if ((mode != HImode || TARGET_FAST_PREFIX)
12091 && (mode != (TARGET_64BIT ? TImode : DImode))
12092 && CONST_INT_P (operands[2])
12093 && CONST_INT_P (operands[3]))
12095 rtx out = operands[0];
12096 HOST_WIDE_INT ct = INTVAL (operands[2]);
12097 HOST_WIDE_INT cf = INTVAL (operands[3]);
12098 HOST_WIDE_INT diff;
12101 /* Sign bit compares are better done using shifts than we do by using
12103 if (sign_bit_compare_p
12104 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12105 ix86_compare_op1, &compare_op))
12107 /* Detect overlap between destination and compare sources. */
12110 if (!sign_bit_compare_p)
12112 bool fpcmp = false;
12114 compare_code = GET_CODE (compare_op);
12116 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12117 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12120 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12123 /* To simplify rest of code, restrict to the GEU case. */
12124 if (compare_code == LTU)
12126 HOST_WIDE_INT tmp = ct;
12129 compare_code = reverse_condition (compare_code);
12130 code = reverse_condition (code);
12135 PUT_CODE (compare_op,
12136 reverse_condition_maybe_unordered
12137 (GET_CODE (compare_op)));
12139 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12143 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12144 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12145 tmp = gen_reg_rtx (mode);
12147 if (mode == DImode)
12148 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12150 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12154 if (code == GT || code == GE)
12155 code = reverse_condition (code);
12158 HOST_WIDE_INT tmp = ct;
12163 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12164 ix86_compare_op1, VOIDmode, 0, -1);
12177 tmp = expand_simple_binop (mode, PLUS,
12179 copy_rtx (tmp), 1, OPTAB_DIRECT);
12190 tmp = expand_simple_binop (mode, IOR,
12192 copy_rtx (tmp), 1, OPTAB_DIRECT);
12194 else if (diff == -1 && ct)
12204 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12206 tmp = expand_simple_binop (mode, PLUS,
12207 copy_rtx (tmp), GEN_INT (cf),
12208 copy_rtx (tmp), 1, OPTAB_DIRECT);
12216 * andl cf - ct, dest
12226 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12229 tmp = expand_simple_binop (mode, AND,
12231 gen_int_mode (cf - ct, mode),
12232 copy_rtx (tmp), 1, OPTAB_DIRECT);
12234 tmp = expand_simple_binop (mode, PLUS,
12235 copy_rtx (tmp), GEN_INT (ct),
12236 copy_rtx (tmp), 1, OPTAB_DIRECT);
12239 if (!rtx_equal_p (tmp, out))
12240 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12242 return 1; /* DONE */
12247 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12250 tmp = ct, ct = cf, cf = tmp;
12253 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12255 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12257 /* We may be reversing unordered compare to normal compare, that
12258 is not valid in general (we may convert non-trapping condition
12259 to trapping one), however on i386 we currently emit all
12260 comparisons unordered. */
12261 compare_code = reverse_condition_maybe_unordered (compare_code);
12262 code = reverse_condition_maybe_unordered (code);
12266 compare_code = reverse_condition (compare_code);
12267 code = reverse_condition (code);
12271 compare_code = UNKNOWN;
12272 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12273 && CONST_INT_P (ix86_compare_op1))
12275 if (ix86_compare_op1 == const0_rtx
12276 && (code == LT || code == GE))
12277 compare_code = code;
12278 else if (ix86_compare_op1 == constm1_rtx)
12282 else if (code == GT)
12287 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12288 if (compare_code != UNKNOWN
12289 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12290 && (cf == -1 || ct == -1))
12292 /* If lea code below could be used, only optimize
12293 if it results in a 2 insn sequence. */
12295 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12296 || diff == 3 || diff == 5 || diff == 9)
12297 || (compare_code == LT && ct == -1)
12298 || (compare_code == GE && cf == -1))
12301 * notl op1 (if necessary)
12309 code = reverse_condition (code);
12312 out = emit_store_flag (out, code, ix86_compare_op0,
12313 ix86_compare_op1, VOIDmode, 0, -1);
12315 out = expand_simple_binop (mode, IOR,
12317 out, 1, OPTAB_DIRECT);
12318 if (out != operands[0])
12319 emit_move_insn (operands[0], out);
12321 return 1; /* DONE */
12326 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12327 || diff == 3 || diff == 5 || diff == 9)
12328 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12330 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12336 * lea cf(dest*(ct-cf)),dest
12340 * This also catches the degenerate setcc-only case.
12346 out = emit_store_flag (out, code, ix86_compare_op0,
12347 ix86_compare_op1, VOIDmode, 0, 1);
12350 /* On x86_64 the lea instruction operates on Pmode, so we need
12351 to get arithmetics done in proper mode to match. */
12353 tmp = copy_rtx (out);
12357 out1 = copy_rtx (out);
12358 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12362 tmp = gen_rtx_PLUS (mode, tmp, out1);
12368 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12371 if (!rtx_equal_p (tmp, out))
12374 out = force_operand (tmp, copy_rtx (out));
12376 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12378 if (!rtx_equal_p (out, operands[0]))
12379 emit_move_insn (operands[0], copy_rtx (out));
12381 return 1; /* DONE */
12385 * General case: Jumpful:
12386 * xorl dest,dest cmpl op1, op2
12387 * cmpl op1, op2 movl ct, dest
12388 * setcc dest jcc 1f
12389 * decl dest movl cf, dest
12390 * andl (cf-ct),dest 1:
12393 * Size 20. Size 14.
12395 * This is reasonably steep, but branch mispredict costs are
12396 * high on modern cpus, so consider failing only if optimizing
12400 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12401 && BRANCH_COST >= 2)
12405 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12410 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12412 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12414 /* We may be reversing unordered compare to normal compare,
12415 that is not valid in general (we may convert non-trapping
12416 condition to trapping one), however on i386 we currently
12417 emit all comparisons unordered. */
12418 code = reverse_condition_maybe_unordered (code);
12422 code = reverse_condition (code);
12423 if (compare_code != UNKNOWN)
12424 compare_code = reverse_condition (compare_code);
12428 if (compare_code != UNKNOWN)
12430 /* notl op1 (if needed)
12435 For x < 0 (resp. x <= -1) there will be no notl,
12436 so if possible swap the constants to get rid of the
12438 True/false will be -1/0 while code below (store flag
12439 followed by decrement) is 0/-1, so the constants need
12440 to be exchanged once more. */
12442 if (compare_code == GE || !cf)
12444 code = reverse_condition (code);
12449 HOST_WIDE_INT tmp = cf;
12454 out = emit_store_flag (out, code, ix86_compare_op0,
12455 ix86_compare_op1, VOIDmode, 0, -1);
12459 out = emit_store_flag (out, code, ix86_compare_op0,
12460 ix86_compare_op1, VOIDmode, 0, 1);
12462 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12463 copy_rtx (out), 1, OPTAB_DIRECT);
12466 out = expand_simple_binop (mode, AND, copy_rtx (out),
12467 gen_int_mode (cf - ct, mode),
12468 copy_rtx (out), 1, OPTAB_DIRECT);
12470 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12471 copy_rtx (out), 1, OPTAB_DIRECT);
12472 if (!rtx_equal_p (out, operands[0]))
12473 emit_move_insn (operands[0], copy_rtx (out));
12475 return 1; /* DONE */
12479 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12481 /* Try a few things more with specific constants and a variable. */
12484 rtx var, orig_out, out, tmp;
12486 if (BRANCH_COST <= 2)
12487 return 0; /* FAIL */
12489 /* If one of the two operands is an interesting constant, load a
12490 constant with the above and mask it in with a logical operation. */
12492 if (CONST_INT_P (operands[2]))
12495 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12496 operands[3] = constm1_rtx, op = and_optab;
12497 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12498 operands[3] = const0_rtx, op = ior_optab;
12500 return 0; /* FAIL */
12502 else if (CONST_INT_P (operands[3]))
12505 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12506 operands[2] = constm1_rtx, op = and_optab;
12507 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12508 operands[2] = const0_rtx, op = ior_optab;
12510 return 0; /* FAIL */
12513 return 0; /* FAIL */
12515 orig_out = operands[0];
12516 tmp = gen_reg_rtx (mode);
12519 /* Recurse to get the constant loaded. */
12520 if (ix86_expand_int_movcc (operands) == 0)
12521 return 0; /* FAIL */
12523 /* Mask in the interesting variable. */
12524 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12526 if (!rtx_equal_p (out, orig_out))
12527 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12529 return 1; /* DONE */
12533 * For comparison with above,
12543 if (! nonimmediate_operand (operands[2], mode))
12544 operands[2] = force_reg (mode, operands[2]);
12545 if (! nonimmediate_operand (operands[3], mode))
12546 operands[3] = force_reg (mode, operands[3]);
12548 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12550 rtx tmp = gen_reg_rtx (mode);
12551 emit_move_insn (tmp, operands[3]);
12554 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12556 rtx tmp = gen_reg_rtx (mode);
12557 emit_move_insn (tmp, operands[2]);
12561 if (! register_operand (operands[2], VOIDmode)
12563 || ! register_operand (operands[3], VOIDmode)))
12564 operands[2] = force_reg (mode, operands[2]);
12567 && ! register_operand (operands[3], VOIDmode))
12568 operands[3] = force_reg (mode, operands[3]);
12570 emit_insn (compare_seq);
12571 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12572 gen_rtx_IF_THEN_ELSE (mode,
12573 compare_op, operands[2],
12576 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12577 gen_rtx_IF_THEN_ELSE (mode,
12579 copy_rtx (operands[3]),
12580 copy_rtx (operands[0]))));
12582 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12583 gen_rtx_IF_THEN_ELSE (mode,
12585 copy_rtx (operands[2]),
12586 copy_rtx (operands[0]))));
12588 return 1; /* DONE */
12591 /* Swap, force into registers, or otherwise massage the two operands
12592 to an sse comparison with a mask result. Thus we differ a bit from
12593 ix86_prepare_fp_compare_args which expects to produce a flags result.
12595 The DEST operand exists to help determine whether to commute commutative
12596 operators. The POP0/POP1 operands are updated in place. The new
12597 comparison code is returned, or UNKNOWN if not implementable. */
12599 static enum rtx_code
12600 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12601 rtx *pop0, rtx *pop1)
12609 /* We have no LTGT as an operator. We could implement it with
12610 NE & ORDERED, but this requires an extra temporary. It's
12611 not clear that it's worth it. */
12618 /* These are supported directly. */
12625 /* For commutative operators, try to canonicalize the destination
12626 operand to be first in the comparison - this helps reload to
12627 avoid extra moves. */
12628 if (!dest || !rtx_equal_p (dest, *pop1))
12636 /* These are not supported directly. Swap the comparison operands
12637 to transform into something that is supported. */
12641 code = swap_condition (code);
12645 gcc_unreachable ();
12651 /* Detect conditional moves that exactly match min/max operational
12652 semantics. Note that this is IEEE safe, as long as we don't
12653 interchange the operands.
12655 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12656 and TRUE if the operation is successful and instructions are emitted. */
12659 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12660 rtx cmp_op1, rtx if_true, rtx if_false)
12662 enum machine_mode mode;
12668 else if (code == UNGE)
12671 if_true = if_false;
12677 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12679 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12684 mode = GET_MODE (dest);
12686 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12687 but MODE may be a vector mode and thus not appropriate. */
12688 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12690 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12693 if_true = force_reg (mode, if_true);
12694 v = gen_rtvec (2, if_true, if_false);
12695 tmp = gen_rtx_UNSPEC (mode, v, u);
12699 code = is_min ? SMIN : SMAX;
12700 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12703 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12707 /* Expand an sse vector comparison. Return the register with the result. */
12710 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12711 rtx op_true, rtx op_false)
12713 enum machine_mode mode = GET_MODE (dest);
12716 cmp_op0 = force_reg (mode, cmp_op0);
12717 if (!nonimmediate_operand (cmp_op1, mode))
12718 cmp_op1 = force_reg (mode, cmp_op1);
12721 || reg_overlap_mentioned_p (dest, op_true)
12722 || reg_overlap_mentioned_p (dest, op_false))
12723 dest = gen_reg_rtx (mode);
12725 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12726 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12731 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12732 operations. This is used for both scalar and vector conditional moves. */
12735 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12737 enum machine_mode mode = GET_MODE (dest);
12740 if (op_false == CONST0_RTX (mode))
12742 op_true = force_reg (mode, op_true);
12743 x = gen_rtx_AND (mode, cmp, op_true);
12744 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12746 else if (op_true == CONST0_RTX (mode))
12748 op_false = force_reg (mode, op_false);
12749 x = gen_rtx_NOT (mode, cmp);
12750 x = gen_rtx_AND (mode, x, op_false);
12751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12755 op_true = force_reg (mode, op_true);
12756 op_false = force_reg (mode, op_false);
12758 t2 = gen_reg_rtx (mode);
12760 t3 = gen_reg_rtx (mode);
12764 x = gen_rtx_AND (mode, op_true, cmp);
12765 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12767 x = gen_rtx_NOT (mode, cmp);
12768 x = gen_rtx_AND (mode, x, op_false);
12769 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12771 x = gen_rtx_IOR (mode, t3, t2);
12772 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12776 /* Expand a floating-point conditional move. Return true if successful. */
12779 ix86_expand_fp_movcc (rtx operands[])
12781 enum machine_mode mode = GET_MODE (operands[0]);
12782 enum rtx_code code = GET_CODE (operands[1]);
12783 rtx tmp, compare_op, second_test, bypass_test;
12785 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12787 enum machine_mode cmode;
12789 /* Since we've no cmove for sse registers, don't force bad register
12790 allocation just to gain access to it. Deny movcc when the
12791 comparison mode doesn't match the move mode. */
12792 cmode = GET_MODE (ix86_compare_op0);
12793 if (cmode == VOIDmode)
12794 cmode = GET_MODE (ix86_compare_op1);
12798 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12800 &ix86_compare_op1);
12801 if (code == UNKNOWN)
12804 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12805 ix86_compare_op1, operands[2],
12809 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12810 ix86_compare_op1, operands[2], operands[3]);
12811 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12815 /* The floating point conditional move instructions don't directly
12816 support conditions resulting from a signed integer comparison. */
12818 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12820 /* The floating point conditional move instructions don't directly
12821 support signed integer comparisons. */
12823 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12825 gcc_assert (!second_test && !bypass_test);
12826 tmp = gen_reg_rtx (QImode);
12827 ix86_expand_setcc (code, tmp);
12829 ix86_compare_op0 = tmp;
12830 ix86_compare_op1 = const0_rtx;
12831 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12833 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12835 tmp = gen_reg_rtx (mode);
12836 emit_move_insn (tmp, operands[3]);
12839 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12841 tmp = gen_reg_rtx (mode);
12842 emit_move_insn (tmp, operands[2]);
12846 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12847 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12848 operands[2], operands[3])));
12850 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12851 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12852 operands[3], operands[0])));
12854 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12855 gen_rtx_IF_THEN_ELSE (mode, second_test,
12856 operands[2], operands[0])));
12861 /* Expand a floating-point vector conditional move; a vcond operation
12862 rather than a movcc operation. */
12865 ix86_expand_fp_vcond (rtx operands[])
12867 enum rtx_code code = GET_CODE (operands[3]);
12870 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12871 &operands[4], &operands[5]);
12872 if (code == UNKNOWN)
12875 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12876 operands[5], operands[1], operands[2]))
12879 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12880 operands[1], operands[2]);
12881 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12885 /* Expand a signed/unsigned integral vector conditional move. */
12888 ix86_expand_int_vcond (rtx operands[])
12890 enum machine_mode mode = GET_MODE (operands[0]);
12891 enum rtx_code code = GET_CODE (operands[3]);
12892 bool negate = false;
12895 cop0 = operands[4];
12896 cop1 = operands[5];
12898 /* Canonicalize the comparison to EQ, GT, GTU. */
12909 code = reverse_condition (code);
12915 code = reverse_condition (code);
12921 code = swap_condition (code);
12922 x = cop0, cop0 = cop1, cop1 = x;
12926 gcc_unreachable ();
12929 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12930 if (mode == V2DImode)
12935 /* SSE4.1 supports EQ. */
12936 if (!TARGET_SSE4_1)
12942 /* SSE4.2 supports GT/GTU. */
12943 if (!TARGET_SSE4_2)
12948 gcc_unreachable ();
12952 /* Unsigned parallel compare is not supported by the hardware. Play some
12953 tricks to turn this into a signed comparison against 0. */
12956 cop0 = force_reg (mode, cop0);
12965 /* Perform a parallel modulo subtraction. */
12966 t1 = gen_reg_rtx (mode);
12967 emit_insn ((mode == V4SImode
12969 : gen_subv2di3) (t1, cop0, cop1));
12971 /* Extract the original sign bit of op0. */
12972 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12974 t2 = gen_reg_rtx (mode);
12975 emit_insn ((mode == V4SImode
12977 : gen_andv2di3) (t2, cop0, mask));
12979 /* XOR it back into the result of the subtraction. This results
12980 in the sign bit set iff we saw unsigned underflow. */
12981 x = gen_reg_rtx (mode);
12982 emit_insn ((mode == V4SImode
12984 : gen_xorv2di3) (x, t1, t2));
12992 /* Perform a parallel unsigned saturating subtraction. */
12993 x = gen_reg_rtx (mode);
12994 emit_insn (gen_rtx_SET (VOIDmode, x,
12995 gen_rtx_US_MINUS (mode, cop0, cop1)));
13002 gcc_unreachable ();
13006 cop1 = CONST0_RTX (mode);
13009 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13010 operands[1+negate], operands[2-negate]);
13012 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13013 operands[2-negate]);
13017 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13018 true if we should do zero extension, else sign extension. HIGH_P is
13019 true if we want the N/2 high elements, else the low elements. */
13022 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13024 enum machine_mode imode = GET_MODE (operands[1]);
13025 rtx (*unpack)(rtx, rtx, rtx);
13032 unpack = gen_vec_interleave_highv16qi;
13034 unpack = gen_vec_interleave_lowv16qi;
13038 unpack = gen_vec_interleave_highv8hi;
13040 unpack = gen_vec_interleave_lowv8hi;
13044 unpack = gen_vec_interleave_highv4si;
13046 unpack = gen_vec_interleave_lowv4si;
13049 gcc_unreachable ();
13052 dest = gen_lowpart (imode, operands[0]);
13055 se = force_reg (imode, CONST0_RTX (imode));
13057 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13058 operands[1], pc_rtx, pc_rtx);
13060 emit_insn (unpack (dest, operands[1], se));
13063 /* This function performs the same task as ix86_expand_sse_unpack,
13064 but with SSE4.1 instructions. */
13067 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13069 enum machine_mode imode = GET_MODE (operands[1]);
13070 rtx (*unpack)(rtx, rtx);
13077 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13079 unpack = gen_sse4_1_extendv8qiv8hi2;
13083 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13085 unpack = gen_sse4_1_extendv4hiv4si2;
13089 unpack = gen_sse4_1_zero_extendv2siv2di2;
13091 unpack = gen_sse4_1_extendv2siv2di2;
13094 gcc_unreachable ();
13097 dest = operands[0];
13100 /* Shift higher 8 bytes to lower 8 bytes. */
13101 src = gen_reg_rtx (imode);
13102 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13103 gen_lowpart (TImode, operands[1]),
13109 emit_insn (unpack (dest, src));
13112 /* Expand conditional increment or decrement using adb/sbb instructions.
13113 The default case using setcc followed by the conditional move can be
13114 done by generic code. */
13116 ix86_expand_int_addcc (rtx operands[])
13118 enum rtx_code code = GET_CODE (operands[1]);
13120 rtx val = const0_rtx;
13121 bool fpcmp = false;
13122 enum machine_mode mode = GET_MODE (operands[0]);
13124 if (operands[3] != const1_rtx
13125 && operands[3] != constm1_rtx)
13127 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13128 ix86_compare_op1, &compare_op))
13130 code = GET_CODE (compare_op);
13132 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13133 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13136 code = ix86_fp_compare_code_to_integer (code);
13143 PUT_CODE (compare_op,
13144 reverse_condition_maybe_unordered
13145 (GET_CODE (compare_op)));
13147 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13149 PUT_MODE (compare_op, mode);
13151 /* Construct either adc or sbb insn. */
13152 if ((code == LTU) == (operands[3] == constm1_rtx))
13154 switch (GET_MODE (operands[0]))
13157 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13160 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13163 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13166 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13169 gcc_unreachable ();
13174 switch (GET_MODE (operands[0]))
13177 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13180 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13183 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13186 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13189 gcc_unreachable ();
13192 return 1; /* DONE */
13196 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13197 works for floating pointer parameters and nonoffsetable memories.
13198 For pushes, it returns just stack offsets; the values will be saved
13199 in the right order. Maximally three parts are generated. */
13202 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13207 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13209 size = (GET_MODE_SIZE (mode) + 4) / 8;
13211 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13212 gcc_assert (size >= 2 && size <= 3);
13214 /* Optimize constant pool reference to immediates. This is used by fp
13215 moves, that force all constants to memory to allow combining. */
13216 if (MEM_P (operand) && MEM_READONLY_P (operand))
13218 rtx tmp = maybe_get_pool_constant (operand);
13223 if (MEM_P (operand) && !offsettable_memref_p (operand))
13225 /* The only non-offsetable memories we handle are pushes. */
13226 int ok = push_operand (operand, VOIDmode);
13230 operand = copy_rtx (operand);
13231 PUT_MODE (operand, Pmode);
13232 parts[0] = parts[1] = parts[2] = operand;
13236 if (GET_CODE (operand) == CONST_VECTOR)
13238 enum machine_mode imode = int_mode_for_mode (mode);
13239 /* Caution: if we looked through a constant pool memory above,
13240 the operand may actually have a different mode now. That's
13241 ok, since we want to pun this all the way back to an integer. */
13242 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13243 gcc_assert (operand != NULL);
13249 if (mode == DImode)
13250 split_di (&operand, 1, &parts[0], &parts[1]);
13253 if (REG_P (operand))
13255 gcc_assert (reload_completed);
13256 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13257 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13259 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13261 else if (offsettable_memref_p (operand))
13263 operand = adjust_address (operand, SImode, 0);
13264 parts[0] = operand;
13265 parts[1] = adjust_address (operand, SImode, 4);
13267 parts[2] = adjust_address (operand, SImode, 8);
13269 else if (GET_CODE (operand) == CONST_DOUBLE)
13274 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13278 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13279 parts[2] = gen_int_mode (l[2], SImode);
13282 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13285 gcc_unreachable ();
13287 parts[1] = gen_int_mode (l[1], SImode);
13288 parts[0] = gen_int_mode (l[0], SImode);
13291 gcc_unreachable ();
13296 if (mode == TImode)
13297 split_ti (&operand, 1, &parts[0], &parts[1]);
13298 if (mode == XFmode || mode == TFmode)
13300 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13301 if (REG_P (operand))
13303 gcc_assert (reload_completed);
13304 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13305 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13307 else if (offsettable_memref_p (operand))
13309 operand = adjust_address (operand, DImode, 0);
13310 parts[0] = operand;
13311 parts[1] = adjust_address (operand, upper_mode, 8);
13313 else if (GET_CODE (operand) == CONST_DOUBLE)
13318 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13319 real_to_target (l, &r, mode);
13321 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13322 if (HOST_BITS_PER_WIDE_INT >= 64)
13325 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13326 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13329 parts[0] = immed_double_const (l[0], l[1], DImode);
13331 if (upper_mode == SImode)
13332 parts[1] = gen_int_mode (l[2], SImode);
13333 else if (HOST_BITS_PER_WIDE_INT >= 64)
13336 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13337 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13340 parts[1] = immed_double_const (l[2], l[3], DImode);
13343 gcc_unreachable ();
13350 /* Emit insns to perform a move or push of DI, DF, and XF values.
13351 Return false when normal moves are needed; true when all required
13352 insns have been emitted. Operands 2-4 contain the input values
13353 int the correct order; operands 5-7 contain the output values. */
13356 ix86_split_long_move (rtx operands[])
13361 int collisions = 0;
13362 enum machine_mode mode = GET_MODE (operands[0]);
13364 /* The DFmode expanders may ask us to move double.
13365 For 64bit target this is single move. By hiding the fact
13366 here we simplify i386.md splitters. */
13367 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13369 /* Optimize constant pool reference to immediates. This is used by
13370 fp moves, that force all constants to memory to allow combining. */
13372 if (MEM_P (operands[1])
13373 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13374 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13375 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13376 if (push_operand (operands[0], VOIDmode))
13378 operands[0] = copy_rtx (operands[0]);
13379 PUT_MODE (operands[0], Pmode);
13382 operands[0] = gen_lowpart (DImode, operands[0]);
13383 operands[1] = gen_lowpart (DImode, operands[1]);
13384 emit_move_insn (operands[0], operands[1]);
13388 /* The only non-offsettable memory we handle is push. */
13389 if (push_operand (operands[0], VOIDmode))
13392 gcc_assert (!MEM_P (operands[0])
13393 || offsettable_memref_p (operands[0]));
13395 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13396 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13398 /* When emitting push, take care for source operands on the stack. */
13399 if (push && MEM_P (operands[1])
13400 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13403 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13404 XEXP (part[1][2], 0));
13405 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13406 XEXP (part[1][1], 0));
13409 /* We need to do copy in the right order in case an address register
13410 of the source overlaps the destination. */
13411 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13413 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13415 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13418 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13421 /* Collision in the middle part can be handled by reordering. */
13422 if (collisions == 1 && nparts == 3
13423 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13426 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13427 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13430 /* If there are more collisions, we can't handle it by reordering.
13431 Do an lea to the last part and use only one colliding move. */
13432 else if (collisions > 1)
13438 base = part[0][nparts - 1];
13440 /* Handle the case when the last part isn't valid for lea.
13441 Happens in 64-bit mode storing the 12-byte XFmode. */
13442 if (GET_MODE (base) != Pmode)
13443 base = gen_rtx_REG (Pmode, REGNO (base));
13445 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13446 part[1][0] = replace_equiv_address (part[1][0], base);
13447 part[1][1] = replace_equiv_address (part[1][1],
13448 plus_constant (base, UNITS_PER_WORD));
13450 part[1][2] = replace_equiv_address (part[1][2],
13451 plus_constant (base, 8));
13461 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13462 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13463 emit_move_insn (part[0][2], part[1][2]);
13468 /* In 64bit mode we don't have 32bit push available. In case this is
13469 register, it is OK - we will just use larger counterpart. We also
13470 retype memory - these comes from attempt to avoid REX prefix on
13471 moving of second half of TFmode value. */
13472 if (GET_MODE (part[1][1]) == SImode)
13474 switch (GET_CODE (part[1][1]))
13477 part[1][1] = adjust_address (part[1][1], DImode, 0);
13481 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13485 gcc_unreachable ();
13488 if (GET_MODE (part[1][0]) == SImode)
13489 part[1][0] = part[1][1];
13492 emit_move_insn (part[0][1], part[1][1]);
13493 emit_move_insn (part[0][0], part[1][0]);
13497 /* Choose correct order to not overwrite the source before it is copied. */
13498 if ((REG_P (part[0][0])
13499 && REG_P (part[1][1])
13500 && (REGNO (part[0][0]) == REGNO (part[1][1])
13502 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13504 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13508 operands[2] = part[0][2];
13509 operands[3] = part[0][1];
13510 operands[4] = part[0][0];
13511 operands[5] = part[1][2];
13512 operands[6] = part[1][1];
13513 operands[7] = part[1][0];
13517 operands[2] = part[0][1];
13518 operands[3] = part[0][0];
13519 operands[5] = part[1][1];
13520 operands[6] = part[1][0];
13527 operands[2] = part[0][0];
13528 operands[3] = part[0][1];
13529 operands[4] = part[0][2];
13530 operands[5] = part[1][0];
13531 operands[6] = part[1][1];
13532 operands[7] = part[1][2];
13536 operands[2] = part[0][0];
13537 operands[3] = part[0][1];
13538 operands[5] = part[1][0];
13539 operands[6] = part[1][1];
13543 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13546 if (CONST_INT_P (operands[5])
13547 && operands[5] != const0_rtx
13548 && REG_P (operands[2]))
13550 if (CONST_INT_P (operands[6])
13551 && INTVAL (operands[6]) == INTVAL (operands[5]))
13552 operands[6] = operands[2];
13555 && CONST_INT_P (operands[7])
13556 && INTVAL (operands[7]) == INTVAL (operands[5]))
13557 operands[7] = operands[2];
13561 && CONST_INT_P (operands[6])
13562 && operands[6] != const0_rtx
13563 && REG_P (operands[3])
13564 && CONST_INT_P (operands[7])
13565 && INTVAL (operands[7]) == INTVAL (operands[6]))
13566 operands[7] = operands[3];
13569 emit_move_insn (operands[2], operands[5]);
13570 emit_move_insn (operands[3], operands[6]);
13572 emit_move_insn (operands[4], operands[7]);
13577 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13578 left shift by a constant, either using a single shift or
13579 a sequence of add instructions. */
13582 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13586 emit_insn ((mode == DImode
13588 : gen_adddi3) (operand, operand, operand));
13590 else if (!optimize_size
13591 && count * ix86_cost->add <= ix86_cost->shift_const)
13594 for (i=0; i<count; i++)
13596 emit_insn ((mode == DImode
13598 : gen_adddi3) (operand, operand, operand));
13602 emit_insn ((mode == DImode
13604 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13608 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13610 rtx low[2], high[2];
13612 const int single_width = mode == DImode ? 32 : 64;
13614 if (CONST_INT_P (operands[2]))
13616 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13617 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13619 if (count >= single_width)
13621 emit_move_insn (high[0], low[1]);
13622 emit_move_insn (low[0], const0_rtx);
13624 if (count > single_width)
13625 ix86_expand_ashl_const (high[0], count - single_width, mode);
13629 if (!rtx_equal_p (operands[0], operands[1]))
13630 emit_move_insn (operands[0], operands[1]);
13631 emit_insn ((mode == DImode
13633 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13634 ix86_expand_ashl_const (low[0], count, mode);
13639 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13641 if (operands[1] == const1_rtx)
13643 /* Assuming we've chosen a QImode capable registers, then 1 << N
13644 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13645 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13647 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13649 ix86_expand_clear (low[0]);
13650 ix86_expand_clear (high[0]);
13651 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13653 d = gen_lowpart (QImode, low[0]);
13654 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13655 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13656 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13658 d = gen_lowpart (QImode, high[0]);
13659 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13660 s = gen_rtx_NE (QImode, flags, const0_rtx);
13661 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13664 /* Otherwise, we can get the same results by manually performing
13665 a bit extract operation on bit 5/6, and then performing the two
13666 shifts. The two methods of getting 0/1 into low/high are exactly
13667 the same size. Avoiding the shift in the bit extract case helps
13668 pentium4 a bit; no one else seems to care much either way. */
13673 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13674 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13676 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13677 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13679 emit_insn ((mode == DImode
13681 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13682 emit_insn ((mode == DImode
13684 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13685 emit_move_insn (low[0], high[0]);
13686 emit_insn ((mode == DImode
13688 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13691 emit_insn ((mode == DImode
13693 : gen_ashldi3) (low[0], low[0], operands[2]));
13694 emit_insn ((mode == DImode
13696 : gen_ashldi3) (high[0], high[0], operands[2]));
13700 if (operands[1] == constm1_rtx)
13702 /* For -1 << N, we can avoid the shld instruction, because we
13703 know that we're shifting 0...31/63 ones into a -1. */
13704 emit_move_insn (low[0], constm1_rtx);
13706 emit_move_insn (high[0], low[0]);
13708 emit_move_insn (high[0], constm1_rtx);
13712 if (!rtx_equal_p (operands[0], operands[1]))
13713 emit_move_insn (operands[0], operands[1]);
13715 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13716 emit_insn ((mode == DImode
13718 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13721 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13723 if (TARGET_CMOVE && scratch)
13725 ix86_expand_clear (scratch);
13726 emit_insn ((mode == DImode
13727 ? gen_x86_shift_adj_1
13728 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13731 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13737 rtx low[2], high[2];
13739 const int single_width = mode == DImode ? 32 : 64;
13741 if (CONST_INT_P (operands[2]))
13743 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13744 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13746 if (count == single_width * 2 - 1)
13748 emit_move_insn (high[0], high[1]);
13749 emit_insn ((mode == DImode
13751 : gen_ashrdi3) (high[0], high[0],
13752 GEN_INT (single_width - 1)));
13753 emit_move_insn (low[0], high[0]);
13756 else if (count >= single_width)
13758 emit_move_insn (low[0], high[1]);
13759 emit_move_insn (high[0], low[0]);
13760 emit_insn ((mode == DImode
13762 : gen_ashrdi3) (high[0], high[0],
13763 GEN_INT (single_width - 1)));
13764 if (count > single_width)
13765 emit_insn ((mode == DImode
13767 : gen_ashrdi3) (low[0], low[0],
13768 GEN_INT (count - single_width)));
13772 if (!rtx_equal_p (operands[0], operands[1]))
13773 emit_move_insn (operands[0], operands[1]);
13774 emit_insn ((mode == DImode
13776 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13777 emit_insn ((mode == DImode
13779 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13784 if (!rtx_equal_p (operands[0], operands[1]))
13785 emit_move_insn (operands[0], operands[1]);
13787 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13789 emit_insn ((mode == DImode
13791 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13792 emit_insn ((mode == DImode
13794 : gen_ashrdi3) (high[0], high[0], operands[2]));
13796 if (TARGET_CMOVE && scratch)
13798 emit_move_insn (scratch, high[0]);
13799 emit_insn ((mode == DImode
13801 : gen_ashrdi3) (scratch, scratch,
13802 GEN_INT (single_width - 1)));
13803 emit_insn ((mode == DImode
13804 ? gen_x86_shift_adj_1
13805 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13809 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13814 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13816 rtx low[2], high[2];
13818 const int single_width = mode == DImode ? 32 : 64;
13820 if (CONST_INT_P (operands[2]))
13822 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13823 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13825 if (count >= single_width)
13827 emit_move_insn (low[0], high[1]);
13828 ix86_expand_clear (high[0]);
13830 if (count > single_width)
13831 emit_insn ((mode == DImode
13833 : gen_lshrdi3) (low[0], low[0],
13834 GEN_INT (count - single_width)));
13838 if (!rtx_equal_p (operands[0], operands[1]))
13839 emit_move_insn (operands[0], operands[1]);
13840 emit_insn ((mode == DImode
13842 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13843 emit_insn ((mode == DImode
13845 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13850 if (!rtx_equal_p (operands[0], operands[1]))
13851 emit_move_insn (operands[0], operands[1]);
13853 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13855 emit_insn ((mode == DImode
13857 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13858 emit_insn ((mode == DImode
13860 : gen_lshrdi3) (high[0], high[0], operands[2]));
13862 /* Heh. By reversing the arguments, we can reuse this pattern. */
13863 if (TARGET_CMOVE && scratch)
13865 ix86_expand_clear (scratch);
13866 emit_insn ((mode == DImode
13867 ? gen_x86_shift_adj_1
13868 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13872 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13876 /* Predict just emitted jump instruction to be taken with probability PROB. */
13878 predict_jump (int prob)
13880 rtx insn = get_last_insn ();
13881 gcc_assert (JUMP_P (insn));
13883 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13888 /* Helper function for the string operations below. Dest VARIABLE whether
13889 it is aligned to VALUE bytes. If true, jump to the label. */
13891 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13893 rtx label = gen_label_rtx ();
13894 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13895 if (GET_MODE (variable) == DImode)
13896 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13898 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13899 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13902 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13904 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13908 /* Adjust COUNTER by the VALUE. */
13910 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13912 if (GET_MODE (countreg) == DImode)
13913 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13915 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13918 /* Zero extend possibly SImode EXP to Pmode register. */
13920 ix86_zero_extend_to_Pmode (rtx exp)
13923 if (GET_MODE (exp) == VOIDmode)
13924 return force_reg (Pmode, exp);
13925 if (GET_MODE (exp) == Pmode)
13926 return copy_to_mode_reg (Pmode, exp);
13927 r = gen_reg_rtx (Pmode);
13928 emit_insn (gen_zero_extendsidi2 (r, exp));
13932 /* Divide COUNTREG by SCALE. */
13934 scale_counter (rtx countreg, int scale)
13937 rtx piece_size_mask;
13941 if (CONST_INT_P (countreg))
13942 return GEN_INT (INTVAL (countreg) / scale);
13943 gcc_assert (REG_P (countreg));
13945 piece_size_mask = GEN_INT (scale - 1);
13946 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13947 GEN_INT (exact_log2 (scale)),
13948 NULL, 1, OPTAB_DIRECT);
13952 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13953 DImode for constant loop counts. */
13955 static enum machine_mode
13956 counter_mode (rtx count_exp)
13958 if (GET_MODE (count_exp) != VOIDmode)
13959 return GET_MODE (count_exp);
13960 if (GET_CODE (count_exp) != CONST_INT)
13962 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13967 /* When SRCPTR is non-NULL, output simple loop to move memory
13968 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13969 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13970 equivalent loop to set memory by VALUE (supposed to be in MODE).
13972 The size is rounded down to whole number of chunk size moved at once.
13973 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13977 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13978 rtx destptr, rtx srcptr, rtx value,
13979 rtx count, enum machine_mode mode, int unroll,
13982 rtx out_label, top_label, iter, tmp;
13983 enum machine_mode iter_mode = counter_mode (count);
13984 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13985 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13991 top_label = gen_label_rtx ();
13992 out_label = gen_label_rtx ();
13993 iter = gen_reg_rtx (iter_mode);
13995 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13996 NULL, 1, OPTAB_DIRECT);
13997 /* Those two should combine. */
13998 if (piece_size == const1_rtx)
14000 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14002 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14004 emit_move_insn (iter, const0_rtx);
14006 emit_label (top_label);
14008 tmp = convert_modes (Pmode, iter_mode, iter, true);
14009 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14010 destmem = change_address (destmem, mode, x_addr);
14014 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14015 srcmem = change_address (srcmem, mode, y_addr);
14017 /* When unrolling for chips that reorder memory reads and writes,
14018 we can save registers by using single temporary.
14019 Also using 4 temporaries is overkill in 32bit mode. */
14020 if (!TARGET_64BIT && 0)
14022 for (i = 0; i < unroll; i++)
14027 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14029 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14031 emit_move_insn (destmem, srcmem);
14037 gcc_assert (unroll <= 4);
14038 for (i = 0; i < unroll; i++)
14040 tmpreg[i] = gen_reg_rtx (mode);
14044 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14046 emit_move_insn (tmpreg[i], srcmem);
14048 for (i = 0; i < unroll; i++)
14053 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14055 emit_move_insn (destmem, tmpreg[i]);
14060 for (i = 0; i < unroll; i++)
14064 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14065 emit_move_insn (destmem, value);
14068 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14069 true, OPTAB_LIB_WIDEN);
14071 emit_move_insn (iter, tmp);
14073 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14075 if (expected_size != -1)
14077 expected_size /= GET_MODE_SIZE (mode) * unroll;
14078 if (expected_size == 0)
14080 else if (expected_size > REG_BR_PROB_BASE)
14081 predict_jump (REG_BR_PROB_BASE - 1);
14083 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14086 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14087 iter = ix86_zero_extend_to_Pmode (iter);
14088 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14089 true, OPTAB_LIB_WIDEN);
14090 if (tmp != destptr)
14091 emit_move_insn (destptr, tmp);
14094 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14095 true, OPTAB_LIB_WIDEN);
14097 emit_move_insn (srcptr, tmp);
14099 emit_label (out_label);
14102 /* Output "rep; mov" instruction.
14103 Arguments have same meaning as for previous function */
14105 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14106 rtx destptr, rtx srcptr,
14108 enum machine_mode mode)
14114 /* If the size is known, it is shorter to use rep movs. */
14115 if (mode == QImode && CONST_INT_P (count)
14116 && !(INTVAL (count) & 3))
14119 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14120 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14121 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14122 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14123 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14124 if (mode != QImode)
14126 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14127 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14128 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14129 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14130 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14131 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14135 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14136 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14138 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14142 /* Output "rep; stos" instruction.
14143 Arguments have same meaning as for previous function */
14145 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14147 enum machine_mode mode)
14152 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14153 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14154 value = force_reg (mode, gen_lowpart (mode, value));
14155 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14156 if (mode != QImode)
14158 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14159 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14160 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14163 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14164 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14168 emit_strmov (rtx destmem, rtx srcmem,
14169 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14171 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14172 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14173 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14176 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14178 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14179 rtx destptr, rtx srcptr, rtx count, int max_size)
14182 if (CONST_INT_P (count))
14184 HOST_WIDE_INT countval = INTVAL (count);
14187 if ((countval & 0x10) && max_size > 16)
14191 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14192 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14195 gcc_unreachable ();
14198 if ((countval & 0x08) && max_size > 8)
14201 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14204 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14205 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14209 if ((countval & 0x04) && max_size > 4)
14211 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14214 if ((countval & 0x02) && max_size > 2)
14216 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14219 if ((countval & 0x01) && max_size > 1)
14221 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14228 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14229 count, 1, OPTAB_DIRECT);
14230 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14231 count, QImode, 1, 4);
14235 /* When there are stringops, we can cheaply increase dest and src pointers.
14236 Otherwise we save code size by maintaining offset (zero is readily
14237 available from preceding rep operation) and using x86 addressing modes.
14239 if (TARGET_SINGLE_STRINGOP)
14243 rtx label = ix86_expand_aligntest (count, 4, true);
14244 src = change_address (srcmem, SImode, srcptr);
14245 dest = change_address (destmem, SImode, destptr);
14246 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14247 emit_label (label);
14248 LABEL_NUSES (label) = 1;
14252 rtx label = ix86_expand_aligntest (count, 2, true);
14253 src = change_address (srcmem, HImode, srcptr);
14254 dest = change_address (destmem, HImode, destptr);
14255 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14256 emit_label (label);
14257 LABEL_NUSES (label) = 1;
14261 rtx label = ix86_expand_aligntest (count, 1, true);
14262 src = change_address (srcmem, QImode, srcptr);
14263 dest = change_address (destmem, QImode, destptr);
14264 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14265 emit_label (label);
14266 LABEL_NUSES (label) = 1;
14271 rtx offset = force_reg (Pmode, const0_rtx);
14276 rtx label = ix86_expand_aligntest (count, 4, true);
14277 src = change_address (srcmem, SImode, srcptr);
14278 dest = change_address (destmem, SImode, destptr);
14279 emit_move_insn (dest, src);
14280 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14281 true, OPTAB_LIB_WIDEN);
14283 emit_move_insn (offset, tmp);
14284 emit_label (label);
14285 LABEL_NUSES (label) = 1;
14289 rtx label = ix86_expand_aligntest (count, 2, true);
14290 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14291 src = change_address (srcmem, HImode, tmp);
14292 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14293 dest = change_address (destmem, HImode, tmp);
14294 emit_move_insn (dest, src);
14295 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14296 true, OPTAB_LIB_WIDEN);
14298 emit_move_insn (offset, tmp);
14299 emit_label (label);
14300 LABEL_NUSES (label) = 1;
14304 rtx label = ix86_expand_aligntest (count, 1, true);
14305 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14306 src = change_address (srcmem, QImode, tmp);
14307 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14308 dest = change_address (destmem, QImode, tmp);
14309 emit_move_insn (dest, src);
14310 emit_label (label);
14311 LABEL_NUSES (label) = 1;
14316 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14318 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14319 rtx count, int max_size)
14322 expand_simple_binop (counter_mode (count), AND, count,
14323 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14324 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14325 gen_lowpart (QImode, value), count, QImode,
14329 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14331 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14335 if (CONST_INT_P (count))
14337 HOST_WIDE_INT countval = INTVAL (count);
14340 if ((countval & 0x10) && max_size > 16)
14344 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14345 emit_insn (gen_strset (destptr, dest, value));
14346 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14347 emit_insn (gen_strset (destptr, dest, value));
14350 gcc_unreachable ();
14353 if ((countval & 0x08) && max_size > 8)
14357 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14358 emit_insn (gen_strset (destptr, dest, value));
14362 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14363 emit_insn (gen_strset (destptr, dest, value));
14364 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14365 emit_insn (gen_strset (destptr, dest, value));
14369 if ((countval & 0x04) && max_size > 4)
14371 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14372 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14375 if ((countval & 0x02) && max_size > 2)
14377 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14378 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14381 if ((countval & 0x01) && max_size > 1)
14383 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14384 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14391 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14396 rtx label = ix86_expand_aligntest (count, 16, true);
14399 dest = change_address (destmem, DImode, destptr);
14400 emit_insn (gen_strset (destptr, dest, value));
14401 emit_insn (gen_strset (destptr, dest, value));
14405 dest = change_address (destmem, SImode, destptr);
14406 emit_insn (gen_strset (destptr, dest, value));
14407 emit_insn (gen_strset (destptr, dest, value));
14408 emit_insn (gen_strset (destptr, dest, value));
14409 emit_insn (gen_strset (destptr, dest, value));
14411 emit_label (label);
14412 LABEL_NUSES (label) = 1;
14416 rtx label = ix86_expand_aligntest (count, 8, true);
14419 dest = change_address (destmem, DImode, destptr);
14420 emit_insn (gen_strset (destptr, dest, value));
14424 dest = change_address (destmem, SImode, destptr);
14425 emit_insn (gen_strset (destptr, dest, value));
14426 emit_insn (gen_strset (destptr, dest, value));
14428 emit_label (label);
14429 LABEL_NUSES (label) = 1;
14433 rtx label = ix86_expand_aligntest (count, 4, true);
14434 dest = change_address (destmem, SImode, destptr);
14435 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14436 emit_label (label);
14437 LABEL_NUSES (label) = 1;
14441 rtx label = ix86_expand_aligntest (count, 2, true);
14442 dest = change_address (destmem, HImode, destptr);
14443 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14444 emit_label (label);
14445 LABEL_NUSES (label) = 1;
14449 rtx label = ix86_expand_aligntest (count, 1, true);
14450 dest = change_address (destmem, QImode, destptr);
14451 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14452 emit_label (label);
14453 LABEL_NUSES (label) = 1;
14457 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14458 DESIRED_ALIGNMENT. */
14460 expand_movmem_prologue (rtx destmem, rtx srcmem,
14461 rtx destptr, rtx srcptr, rtx count,
14462 int align, int desired_alignment)
14464 if (align <= 1 && desired_alignment > 1)
14466 rtx label = ix86_expand_aligntest (destptr, 1, false);
14467 srcmem = change_address (srcmem, QImode, srcptr);
14468 destmem = change_address (destmem, QImode, destptr);
14469 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14470 ix86_adjust_counter (count, 1);
14471 emit_label (label);
14472 LABEL_NUSES (label) = 1;
14474 if (align <= 2 && desired_alignment > 2)
14476 rtx label = ix86_expand_aligntest (destptr, 2, false);
14477 srcmem = change_address (srcmem, HImode, srcptr);
14478 destmem = change_address (destmem, HImode, destptr);
14479 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14480 ix86_adjust_counter (count, 2);
14481 emit_label (label);
14482 LABEL_NUSES (label) = 1;
14484 if (align <= 4 && desired_alignment > 4)
14486 rtx label = ix86_expand_aligntest (destptr, 4, false);
14487 srcmem = change_address (srcmem, SImode, srcptr);
14488 destmem = change_address (destmem, SImode, destptr);
14489 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14490 ix86_adjust_counter (count, 4);
14491 emit_label (label);
14492 LABEL_NUSES (label) = 1;
14494 gcc_assert (desired_alignment <= 8);
14497 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14498 DESIRED_ALIGNMENT. */
14500 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14501 int align, int desired_alignment)
14503 if (align <= 1 && desired_alignment > 1)
14505 rtx label = ix86_expand_aligntest (destptr, 1, false);
14506 destmem = change_address (destmem, QImode, destptr);
14507 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14508 ix86_adjust_counter (count, 1);
14509 emit_label (label);
14510 LABEL_NUSES (label) = 1;
14512 if (align <= 2 && desired_alignment > 2)
14514 rtx label = ix86_expand_aligntest (destptr, 2, false);
14515 destmem = change_address (destmem, HImode, destptr);
14516 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14517 ix86_adjust_counter (count, 2);
14518 emit_label (label);
14519 LABEL_NUSES (label) = 1;
14521 if (align <= 4 && desired_alignment > 4)
14523 rtx label = ix86_expand_aligntest (destptr, 4, false);
14524 destmem = change_address (destmem, SImode, destptr);
14525 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14526 ix86_adjust_counter (count, 4);
14527 emit_label (label);
14528 LABEL_NUSES (label) = 1;
14530 gcc_assert (desired_alignment <= 8);
14533 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14534 static enum stringop_alg
14535 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14536 int *dynamic_check)
14538 const struct stringop_algs * algs;
14540 *dynamic_check = -1;
14542 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14544 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14545 if (stringop_alg != no_stringop)
14546 return stringop_alg;
14547 /* rep; movq or rep; movl is the smallest variant. */
14548 else if (optimize_size)
14550 if (!count || (count & 3))
14551 return rep_prefix_1_byte;
14553 return rep_prefix_4_byte;
14555 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14557 else if (expected_size != -1 && expected_size < 4)
14558 return loop_1_byte;
14559 else if (expected_size != -1)
14562 enum stringop_alg alg = libcall;
14563 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14565 gcc_assert (algs->size[i].max);
14566 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14568 if (algs->size[i].alg != libcall)
14569 alg = algs->size[i].alg;
14570 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14571 last non-libcall inline algorithm. */
14572 if (TARGET_INLINE_ALL_STRINGOPS)
14574 /* When the current size is best to be copied by a libcall,
14575 but we are still forced to inline, run the heuristic bellow
14576 that will pick code for medium sized blocks. */
14577 if (alg != libcall)
14582 return algs->size[i].alg;
14585 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14587 /* When asked to inline the call anyway, try to pick meaningful choice.
14588 We look for maximal size of block that is faster to copy by hand and
14589 take blocks of at most of that size guessing that average size will
14590 be roughly half of the block.
14592 If this turns out to be bad, we might simply specify the preferred
14593 choice in ix86_costs. */
14594 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14595 && algs->unknown_size == libcall)
14598 enum stringop_alg alg;
14601 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14602 if (algs->size[i].alg != libcall && algs->size[i].alg)
14603 max = algs->size[i].max;
14606 alg = decide_alg (count, max / 2, memset, dynamic_check);
14607 gcc_assert (*dynamic_check == -1);
14608 gcc_assert (alg != libcall);
14609 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14610 *dynamic_check = max;
14613 return algs->unknown_size;
14616 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14617 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14619 decide_alignment (int align,
14620 enum stringop_alg alg,
14623 int desired_align = 0;
14627 gcc_unreachable ();
14629 case unrolled_loop:
14630 desired_align = GET_MODE_SIZE (Pmode);
14632 case rep_prefix_8_byte:
14635 case rep_prefix_4_byte:
14636 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14637 copying whole cacheline at once. */
14638 if (TARGET_PENTIUMPRO)
14643 case rep_prefix_1_byte:
14644 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14645 copying whole cacheline at once. */
14646 if (TARGET_PENTIUMPRO)
14660 if (desired_align < align)
14661 desired_align = align;
14662 if (expected_size != -1 && expected_size < 4)
14663 desired_align = align;
14664 return desired_align;
14667 /* Return the smallest power of 2 greater than VAL. */
14669 smallest_pow2_greater_than (int val)
14677 /* Expand string move (memcpy) operation. Use i386 string operations when
14678 profitable. expand_clrmem contains similar code. The code depends upon
14679 architecture, block size and alignment, but always has the same
14682 1) Prologue guard: Conditional that jumps up to epilogues for small
14683 blocks that can be handled by epilogue alone. This is faster but
14684 also needed for correctness, since prologue assume the block is larger
14685 than the desired alignment.
14687 Optional dynamic check for size and libcall for large
14688 blocks is emitted here too, with -minline-stringops-dynamically.
14690 2) Prologue: copy first few bytes in order to get destination aligned
14691 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14692 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14693 We emit either a jump tree on power of two sized blocks, or a byte loop.
14695 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14696 with specified algorithm.
14698 4) Epilogue: code copying tail of the block that is too small to be
14699 handled by main body (or up to size guarded by prologue guard). */
14702 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14703 rtx expected_align_exp, rtx expected_size_exp)
14709 rtx jump_around_label = NULL;
14710 HOST_WIDE_INT align = 1;
14711 unsigned HOST_WIDE_INT count = 0;
14712 HOST_WIDE_INT expected_size = -1;
14713 int size_needed = 0, epilogue_size_needed;
14714 int desired_align = 0;
14715 enum stringop_alg alg;
14718 if (CONST_INT_P (align_exp))
14719 align = INTVAL (align_exp);
14720 /* i386 can do misaligned access on reasonably increased cost. */
14721 if (CONST_INT_P (expected_align_exp)
14722 && INTVAL (expected_align_exp) > align)
14723 align = INTVAL (expected_align_exp);
14724 if (CONST_INT_P (count_exp))
14725 count = expected_size = INTVAL (count_exp);
14726 if (CONST_INT_P (expected_size_exp) && count == 0)
14727 expected_size = INTVAL (expected_size_exp);
14729 /* Step 0: Decide on preferred algorithm, desired alignment and
14730 size of chunks to be copied by main loop. */
14732 alg = decide_alg (count, expected_size, false, &dynamic_check);
14733 desired_align = decide_alignment (align, alg, expected_size);
14735 if (!TARGET_ALIGN_STRINGOPS)
14736 align = desired_align;
14738 if (alg == libcall)
14740 gcc_assert (alg != no_stringop);
14742 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14743 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14744 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14749 gcc_unreachable ();
14751 size_needed = GET_MODE_SIZE (Pmode);
14753 case unrolled_loop:
14754 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14756 case rep_prefix_8_byte:
14759 case rep_prefix_4_byte:
14762 case rep_prefix_1_byte:
14768 epilogue_size_needed = size_needed;
14770 /* Step 1: Prologue guard. */
14772 /* Alignment code needs count to be in register. */
14773 if (CONST_INT_P (count_exp) && desired_align > align)
14775 enum machine_mode mode = SImode;
14776 if (TARGET_64BIT && (count & ~0xffffffff))
14778 count_exp = force_reg (mode, count_exp);
14780 gcc_assert (desired_align >= 1 && align >= 1);
14782 /* Ensure that alignment prologue won't copy past end of block. */
14783 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14785 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14786 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14787 Make sure it is power of 2. */
14788 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14790 label = gen_label_rtx ();
14791 emit_cmp_and_jump_insns (count_exp,
14792 GEN_INT (epilogue_size_needed),
14793 LTU, 0, counter_mode (count_exp), 1, label);
14794 if (GET_CODE (count_exp) == CONST_INT)
14796 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14797 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14799 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14801 /* Emit code to decide on runtime whether library call or inline should be
14803 if (dynamic_check != -1)
14805 rtx hot_label = gen_label_rtx ();
14806 jump_around_label = gen_label_rtx ();
14807 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14808 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14809 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14810 emit_block_move_via_libcall (dst, src, count_exp, false);
14811 emit_jump (jump_around_label);
14812 emit_label (hot_label);
14815 /* Step 2: Alignment prologue. */
14817 if (desired_align > align)
14819 /* Except for the first move in epilogue, we no longer know
14820 constant offset in aliasing info. It don't seems to worth
14821 the pain to maintain it for the first move, so throw away
14823 src = change_address (src, BLKmode, srcreg);
14824 dst = change_address (dst, BLKmode, destreg);
14825 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14828 if (label && size_needed == 1)
14830 emit_label (label);
14831 LABEL_NUSES (label) = 1;
14835 /* Step 3: Main loop. */
14841 gcc_unreachable ();
14843 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14844 count_exp, QImode, 1, expected_size);
14847 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14848 count_exp, Pmode, 1, expected_size);
14850 case unrolled_loop:
14851 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14852 registers for 4 temporaries anyway. */
14853 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14854 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14857 case rep_prefix_8_byte:
14858 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14861 case rep_prefix_4_byte:
14862 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14865 case rep_prefix_1_byte:
14866 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14870 /* Adjust properly the offset of src and dest memory for aliasing. */
14871 if (CONST_INT_P (count_exp))
14873 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14874 (count / size_needed) * size_needed);
14875 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14876 (count / size_needed) * size_needed);
14880 src = change_address (src, BLKmode, srcreg);
14881 dst = change_address (dst, BLKmode, destreg);
14884 /* Step 4: Epilogue to copy the remaining bytes. */
14888 /* When the main loop is done, COUNT_EXP might hold original count,
14889 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14890 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14891 bytes. Compensate if needed. */
14893 if (size_needed < epilogue_size_needed)
14896 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14897 GEN_INT (size_needed - 1), count_exp, 1,
14899 if (tmp != count_exp)
14900 emit_move_insn (count_exp, tmp);
14902 emit_label (label);
14903 LABEL_NUSES (label) = 1;
14906 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14907 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14908 epilogue_size_needed);
14909 if (jump_around_label)
14910 emit_label (jump_around_label);
14914 /* Helper function for memcpy. For QImode value 0xXY produce
14915 0xXYXYXYXY of wide specified by MODE. This is essentially
14916 a * 0x10101010, but we can do slightly better than
14917 synth_mult by unwinding the sequence by hand on CPUs with
14920 promote_duplicated_reg (enum machine_mode mode, rtx val)
14922 enum machine_mode valmode = GET_MODE (val);
14924 int nops = mode == DImode ? 3 : 2;
14926 gcc_assert (mode == SImode || mode == DImode);
14927 if (val == const0_rtx)
14928 return copy_to_mode_reg (mode, const0_rtx);
14929 if (CONST_INT_P (val))
14931 HOST_WIDE_INT v = INTVAL (val) & 255;
14935 if (mode == DImode)
14936 v |= (v << 16) << 16;
14937 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14940 if (valmode == VOIDmode)
14942 if (valmode != QImode)
14943 val = gen_lowpart (QImode, val);
14944 if (mode == QImode)
14946 if (!TARGET_PARTIAL_REG_STALL)
14948 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14949 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14950 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14951 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14953 rtx reg = convert_modes (mode, QImode, val, true);
14954 tmp = promote_duplicated_reg (mode, const1_rtx);
14955 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14960 rtx reg = convert_modes (mode, QImode, val, true);
14962 if (!TARGET_PARTIAL_REG_STALL)
14963 if (mode == SImode)
14964 emit_insn (gen_movsi_insv_1 (reg, reg));
14966 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14969 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14970 NULL, 1, OPTAB_DIRECT);
14972 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14974 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14975 NULL, 1, OPTAB_DIRECT);
14976 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14977 if (mode == SImode)
14979 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14980 NULL, 1, OPTAB_DIRECT);
14981 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14986 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14987 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14988 alignment from ALIGN to DESIRED_ALIGN. */
14990 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14995 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14996 promoted_val = promote_duplicated_reg (DImode, val);
14997 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14998 promoted_val = promote_duplicated_reg (SImode, val);
14999 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15000 promoted_val = promote_duplicated_reg (HImode, val);
15002 promoted_val = val;
15004 return promoted_val;
15007 /* Expand string clear operation (bzero). Use i386 string operations when
15008 profitable. See expand_movmem comment for explanation of individual
15009 steps performed. */
15011 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15012 rtx expected_align_exp, rtx expected_size_exp)
15017 rtx jump_around_label = NULL;
15018 HOST_WIDE_INT align = 1;
15019 unsigned HOST_WIDE_INT count = 0;
15020 HOST_WIDE_INT expected_size = -1;
15021 int size_needed = 0, epilogue_size_needed;
15022 int desired_align = 0;
15023 enum stringop_alg alg;
15024 rtx promoted_val = NULL;
15025 bool force_loopy_epilogue = false;
15028 if (CONST_INT_P (align_exp))
15029 align = INTVAL (align_exp);
15030 /* i386 can do misaligned access on reasonably increased cost. */
15031 if (CONST_INT_P (expected_align_exp)
15032 && INTVAL (expected_align_exp) > align)
15033 align = INTVAL (expected_align_exp);
15034 if (CONST_INT_P (count_exp))
15035 count = expected_size = INTVAL (count_exp);
15036 if (CONST_INT_P (expected_size_exp) && count == 0)
15037 expected_size = INTVAL (expected_size_exp);
15039 /* Step 0: Decide on preferred algorithm, desired alignment and
15040 size of chunks to be copied by main loop. */
15042 alg = decide_alg (count, expected_size, true, &dynamic_check);
15043 desired_align = decide_alignment (align, alg, expected_size);
15045 if (!TARGET_ALIGN_STRINGOPS)
15046 align = desired_align;
15048 if (alg == libcall)
15050 gcc_assert (alg != no_stringop);
15052 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15053 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15058 gcc_unreachable ();
15060 size_needed = GET_MODE_SIZE (Pmode);
15062 case unrolled_loop:
15063 size_needed = GET_MODE_SIZE (Pmode) * 4;
15065 case rep_prefix_8_byte:
15068 case rep_prefix_4_byte:
15071 case rep_prefix_1_byte:
15076 epilogue_size_needed = size_needed;
15078 /* Step 1: Prologue guard. */
15080 /* Alignment code needs count to be in register. */
15081 if (CONST_INT_P (count_exp) && desired_align > align)
15083 enum machine_mode mode = SImode;
15084 if (TARGET_64BIT && (count & ~0xffffffff))
15086 count_exp = force_reg (mode, count_exp);
15088 /* Do the cheap promotion to allow better CSE across the
15089 main loop and epilogue (ie one load of the big constant in the
15090 front of all code. */
15091 if (CONST_INT_P (val_exp))
15092 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15093 desired_align, align);
15094 /* Ensure that alignment prologue won't copy past end of block. */
15095 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15097 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15098 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15099 Make sure it is power of 2. */
15100 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15102 /* To improve performance of small blocks, we jump around the VAL
15103 promoting mode. This mean that if the promoted VAL is not constant,
15104 we might not use it in the epilogue and have to use byte
15106 if (epilogue_size_needed > 2 && !promoted_val)
15107 force_loopy_epilogue = true;
15108 label = gen_label_rtx ();
15109 emit_cmp_and_jump_insns (count_exp,
15110 GEN_INT (epilogue_size_needed),
15111 LTU, 0, counter_mode (count_exp), 1, label);
15112 if (GET_CODE (count_exp) == CONST_INT)
15114 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15115 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15117 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15119 if (dynamic_check != -1)
15121 rtx hot_label = gen_label_rtx ();
15122 jump_around_label = gen_label_rtx ();
15123 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15124 LEU, 0, counter_mode (count_exp), 1, hot_label);
15125 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15126 set_storage_via_libcall (dst, count_exp, val_exp, false);
15127 emit_jump (jump_around_label);
15128 emit_label (hot_label);
15131 /* Step 2: Alignment prologue. */
15133 /* Do the expensive promotion once we branched off the small blocks. */
15135 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15136 desired_align, align);
15137 gcc_assert (desired_align >= 1 && align >= 1);
15139 if (desired_align > align)
15141 /* Except for the first move in epilogue, we no longer know
15142 constant offset in aliasing info. It don't seems to worth
15143 the pain to maintain it for the first move, so throw away
15145 dst = change_address (dst, BLKmode, destreg);
15146 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15149 if (label && size_needed == 1)
15151 emit_label (label);
15152 LABEL_NUSES (label) = 1;
15156 /* Step 3: Main loop. */
15162 gcc_unreachable ();
15164 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15165 count_exp, QImode, 1, expected_size);
15168 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15169 count_exp, Pmode, 1, expected_size);
15171 case unrolled_loop:
15172 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15173 count_exp, Pmode, 4, expected_size);
15175 case rep_prefix_8_byte:
15176 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15179 case rep_prefix_4_byte:
15180 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15183 case rep_prefix_1_byte:
15184 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15188 /* Adjust properly the offset of src and dest memory for aliasing. */
15189 if (CONST_INT_P (count_exp))
15190 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15191 (count / size_needed) * size_needed);
15193 dst = change_address (dst, BLKmode, destreg);
15195 /* Step 4: Epilogue to copy the remaining bytes. */
15199 /* When the main loop is done, COUNT_EXP might hold original count,
15200 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15201 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15202 bytes. Compensate if needed. */
15204 if (size_needed < desired_align - align)
15207 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15208 GEN_INT (size_needed - 1), count_exp, 1,
15210 size_needed = desired_align - align + 1;
15211 if (tmp != count_exp)
15212 emit_move_insn (count_exp, tmp);
15214 emit_label (label);
15215 LABEL_NUSES (label) = 1;
15217 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15219 if (force_loopy_epilogue)
15220 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15223 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15226 if (jump_around_label)
15227 emit_label (jump_around_label);
15231 /* Expand the appropriate insns for doing strlen if not just doing
15234 out = result, initialized with the start address
15235 align_rtx = alignment of the address.
15236 scratch = scratch register, initialized with the startaddress when
15237 not aligned, otherwise undefined
15239 This is just the body. It needs the initializations mentioned above and
15240 some address computing at the end. These things are done in i386.md. */
15243 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15247 rtx align_2_label = NULL_RTX;
15248 rtx align_3_label = NULL_RTX;
15249 rtx align_4_label = gen_label_rtx ();
15250 rtx end_0_label = gen_label_rtx ();
15252 rtx tmpreg = gen_reg_rtx (SImode);
15253 rtx scratch = gen_reg_rtx (SImode);
15257 if (CONST_INT_P (align_rtx))
15258 align = INTVAL (align_rtx);
15260 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15262 /* Is there a known alignment and is it less than 4? */
15265 rtx scratch1 = gen_reg_rtx (Pmode);
15266 emit_move_insn (scratch1, out);
15267 /* Is there a known alignment and is it not 2? */
15270 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15271 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15273 /* Leave just the 3 lower bits. */
15274 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15275 NULL_RTX, 0, OPTAB_WIDEN);
15277 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15278 Pmode, 1, align_4_label);
15279 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15280 Pmode, 1, align_2_label);
15281 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15282 Pmode, 1, align_3_label);
15286 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15287 check if is aligned to 4 - byte. */
15289 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15290 NULL_RTX, 0, OPTAB_WIDEN);
15292 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15293 Pmode, 1, align_4_label);
15296 mem = change_address (src, QImode, out);
15298 /* Now compare the bytes. */
15300 /* Compare the first n unaligned byte on a byte per byte basis. */
15301 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15302 QImode, 1, end_0_label);
15304 /* Increment the address. */
15306 emit_insn (gen_adddi3 (out, out, const1_rtx));
15308 emit_insn (gen_addsi3 (out, out, const1_rtx));
15310 /* Not needed with an alignment of 2 */
15313 emit_label (align_2_label);
15315 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15319 emit_insn (gen_adddi3 (out, out, const1_rtx));
15321 emit_insn (gen_addsi3 (out, out, const1_rtx));
15323 emit_label (align_3_label);
15326 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15330 emit_insn (gen_adddi3 (out, out, const1_rtx));
15332 emit_insn (gen_addsi3 (out, out, const1_rtx));
15335 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15336 align this loop. It gives only huge programs, but does not help to
15338 emit_label (align_4_label);
15340 mem = change_address (src, SImode, out);
15341 emit_move_insn (scratch, mem);
15343 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15345 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15347 /* This formula yields a nonzero result iff one of the bytes is zero.
15348 This saves three branches inside loop and many cycles. */
15350 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15351 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15352 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15353 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15354 gen_int_mode (0x80808080, SImode)));
15355 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15360 rtx reg = gen_reg_rtx (SImode);
15361 rtx reg2 = gen_reg_rtx (Pmode);
15362 emit_move_insn (reg, tmpreg);
15363 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15365 /* If zero is not in the first two bytes, move two bytes forward. */
15366 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15367 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15368 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15369 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15370 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15373 /* Emit lea manually to avoid clobbering of flags. */
15374 emit_insn (gen_rtx_SET (SImode, reg2,
15375 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15377 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15378 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15379 emit_insn (gen_rtx_SET (VOIDmode, out,
15380 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15387 rtx end_2_label = gen_label_rtx ();
15388 /* Is zero in the first two bytes? */
15390 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15391 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15392 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15393 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15394 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15396 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15397 JUMP_LABEL (tmp) = end_2_label;
15399 /* Not in the first two. Move two bytes forward. */
15400 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15402 emit_insn (gen_adddi3 (out, out, const2_rtx));
15404 emit_insn (gen_addsi3 (out, out, const2_rtx));
15406 emit_label (end_2_label);
15410 /* Avoid branch in fixing the byte. */
15411 tmpreg = gen_lowpart (QImode, tmpreg);
15412 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15413 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15415 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15417 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15419 emit_label (end_0_label);
15422 /* Expand strlen. */
15425 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15427 rtx addr, scratch1, scratch2, scratch3, scratch4;
15429 /* The generic case of strlen expander is long. Avoid it's
15430 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15432 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15433 && !TARGET_INLINE_ALL_STRINGOPS
15435 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15438 addr = force_reg (Pmode, XEXP (src, 0));
15439 scratch1 = gen_reg_rtx (Pmode);
15441 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15444 /* Well it seems that some optimizer does not combine a call like
15445 foo(strlen(bar), strlen(bar));
15446 when the move and the subtraction is done here. It does calculate
15447 the length just once when these instructions are done inside of
15448 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15449 often used and I use one fewer register for the lifetime of
15450 output_strlen_unroll() this is better. */
15452 emit_move_insn (out, addr);
15454 ix86_expand_strlensi_unroll_1 (out, src, align);
15456 /* strlensi_unroll_1 returns the address of the zero at the end of
15457 the string, like memchr(), so compute the length by subtracting
15458 the start address. */
15460 emit_insn (gen_subdi3 (out, out, addr));
15462 emit_insn (gen_subsi3 (out, out, addr));
15467 scratch2 = gen_reg_rtx (Pmode);
15468 scratch3 = gen_reg_rtx (Pmode);
15469 scratch4 = force_reg (Pmode, constm1_rtx);
15471 emit_move_insn (scratch3, addr);
15472 eoschar = force_reg (QImode, eoschar);
15474 src = replace_equiv_address_nv (src, scratch3);
15476 /* If .md starts supporting :P, this can be done in .md. */
15477 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15478 scratch4), UNSPEC_SCAS);
15479 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15482 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15483 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15487 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15488 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15494 /* For given symbol (function) construct code to compute address of it's PLT
15495 entry in large x86-64 PIC model. */
15497 construct_plt_address (rtx symbol)
15499 rtx tmp = gen_reg_rtx (Pmode);
15500 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15502 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15503 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15505 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15506 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15511 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15512 rtx callarg2 ATTRIBUTE_UNUSED,
15513 rtx pop, int sibcall)
15515 rtx use = NULL, call;
15517 if (pop == const0_rtx)
15519 gcc_assert (!TARGET_64BIT || !pop);
15521 if (TARGET_MACHO && !TARGET_64BIT)
15524 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15525 fnaddr = machopic_indirect_call_target (fnaddr);
15530 /* Static functions and indirect calls don't need the pic register. */
15531 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15532 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15533 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15534 use_reg (&use, pic_offset_table_rtx);
15537 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15539 rtx al = gen_rtx_REG (QImode, 0);
15540 emit_move_insn (al, callarg2);
15541 use_reg (&use, al);
15544 if (ix86_cmodel == CM_LARGE_PIC
15545 && GET_CODE (fnaddr) == MEM
15546 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15547 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15548 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15549 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15551 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15552 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15554 if (sibcall && TARGET_64BIT
15555 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15558 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15559 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15560 emit_move_insn (fnaddr, addr);
15561 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15564 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15566 call = gen_rtx_SET (VOIDmode, retval, call);
15569 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15570 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15571 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15574 call = emit_call_insn (call);
15576 CALL_INSN_FUNCTION_USAGE (call) = use;
15580 /* Clear stack slot assignments remembered from previous functions.
15581 This is called from INIT_EXPANDERS once before RTL is emitted for each
15584 static struct machine_function *
15585 ix86_init_machine_status (void)
15587 struct machine_function *f;
15589 f = GGC_CNEW (struct machine_function);
15590 f->use_fast_prologue_epilogue_nregs = -1;
15591 f->tls_descriptor_call_expanded_p = 0;
15596 /* Return a MEM corresponding to a stack slot with mode MODE.
15597 Allocate a new slot if necessary.
15599 The RTL for a function can have several slots available: N is
15600 which slot to use. */
15603 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15605 struct stack_local_entry *s;
15607 gcc_assert (n < MAX_386_STACK_LOCALS);
15609 for (s = ix86_stack_locals; s; s = s->next)
15610 if (s->mode == mode && s->n == n)
15611 return copy_rtx (s->rtl);
15613 s = (struct stack_local_entry *)
15614 ggc_alloc (sizeof (struct stack_local_entry));
15617 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15619 s->next = ix86_stack_locals;
15620 ix86_stack_locals = s;
15624 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15626 static GTY(()) rtx ix86_tls_symbol;
15628 ix86_tls_get_addr (void)
15631 if (!ix86_tls_symbol)
15633 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15634 (TARGET_ANY_GNU_TLS
15636 ? "___tls_get_addr"
15637 : "__tls_get_addr");
15640 return ix86_tls_symbol;
15643 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15645 static GTY(()) rtx ix86_tls_module_base_symbol;
15647 ix86_tls_module_base (void)
15650 if (!ix86_tls_module_base_symbol)
15652 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15653 "_TLS_MODULE_BASE_");
15654 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15655 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15658 return ix86_tls_module_base_symbol;
15661 /* Calculate the length of the memory address in the instruction
15662 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15665 memory_address_length (rtx addr)
15667 struct ix86_address parts;
15668 rtx base, index, disp;
15672 if (GET_CODE (addr) == PRE_DEC
15673 || GET_CODE (addr) == POST_INC
15674 || GET_CODE (addr) == PRE_MODIFY
15675 || GET_CODE (addr) == POST_MODIFY)
15678 ok = ix86_decompose_address (addr, &parts);
15681 if (parts.base && GET_CODE (parts.base) == SUBREG)
15682 parts.base = SUBREG_REG (parts.base);
15683 if (parts.index && GET_CODE (parts.index) == SUBREG)
15684 parts.index = SUBREG_REG (parts.index);
15687 index = parts.index;
15692 - esp as the base always wants an index,
15693 - ebp as the base always wants a displacement. */
15695 /* Register Indirect. */
15696 if (base && !index && !disp)
15698 /* esp (for its index) and ebp (for its displacement) need
15699 the two-byte modrm form. */
15700 if (addr == stack_pointer_rtx
15701 || addr == arg_pointer_rtx
15702 || addr == frame_pointer_rtx
15703 || addr == hard_frame_pointer_rtx)
15707 /* Direct Addressing. */
15708 else if (disp && !base && !index)
15713 /* Find the length of the displacement constant. */
15716 if (base && satisfies_constraint_K (disp))
15721 /* ebp always wants a displacement. */
15722 else if (base == hard_frame_pointer_rtx)
15725 /* An index requires the two-byte modrm form.... */
15727 /* ...like esp, which always wants an index. */
15728 || base == stack_pointer_rtx
15729 || base == arg_pointer_rtx
15730 || base == frame_pointer_rtx)
15737 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15738 is set, expect that insn have 8bit immediate alternative. */
15740 ix86_attr_length_immediate_default (rtx insn, int shortform)
15744 extract_insn_cached (insn);
15745 for (i = recog_data.n_operands - 1; i >= 0; --i)
15746 if (CONSTANT_P (recog_data.operand[i]))
15749 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15753 switch (get_attr_mode (insn))
15764 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15769 fatal_insn ("unknown insn mode", insn);
15775 /* Compute default value for "length_address" attribute. */
15777 ix86_attr_length_address_default (rtx insn)
15781 if (get_attr_type (insn) == TYPE_LEA)
15783 rtx set = PATTERN (insn);
15785 if (GET_CODE (set) == PARALLEL)
15786 set = XVECEXP (set, 0, 0);
15788 gcc_assert (GET_CODE (set) == SET);
15790 return memory_address_length (SET_SRC (set));
15793 extract_insn_cached (insn);
15794 for (i = recog_data.n_operands - 1; i >= 0; --i)
15795 if (MEM_P (recog_data.operand[i]))
15797 return memory_address_length (XEXP (recog_data.operand[i], 0));
15803 /* Return the maximum number of instructions a cpu can issue. */
15806 ix86_issue_rate (void)
15810 case PROCESSOR_PENTIUM:
15814 case PROCESSOR_PENTIUMPRO:
15815 case PROCESSOR_PENTIUM4:
15816 case PROCESSOR_ATHLON:
15818 case PROCESSOR_AMDFAM10:
15819 case PROCESSOR_NOCONA:
15820 case PROCESSOR_GENERIC32:
15821 case PROCESSOR_GENERIC64:
15824 case PROCESSOR_CORE2:
15832 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15833 by DEP_INSN and nothing set by DEP_INSN. */
15836 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15840 /* Simplify the test for uninteresting insns. */
15841 if (insn_type != TYPE_SETCC
15842 && insn_type != TYPE_ICMOV
15843 && insn_type != TYPE_FCMOV
15844 && insn_type != TYPE_IBR)
15847 if ((set = single_set (dep_insn)) != 0)
15849 set = SET_DEST (set);
15852 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15853 && XVECLEN (PATTERN (dep_insn), 0) == 2
15854 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15855 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15857 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15858 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15863 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15866 /* This test is true if the dependent insn reads the flags but
15867 not any other potentially set register. */
15868 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15871 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15877 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15878 address with operands set by DEP_INSN. */
15881 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15885 if (insn_type == TYPE_LEA
15888 addr = PATTERN (insn);
15890 if (GET_CODE (addr) == PARALLEL)
15891 addr = XVECEXP (addr, 0, 0);
15893 gcc_assert (GET_CODE (addr) == SET);
15895 addr = SET_SRC (addr);
15900 extract_insn_cached (insn);
15901 for (i = recog_data.n_operands - 1; i >= 0; --i)
15902 if (MEM_P (recog_data.operand[i]))
15904 addr = XEXP (recog_data.operand[i], 0);
15911 return modified_in_p (addr, dep_insn);
15915 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15917 enum attr_type insn_type, dep_insn_type;
15918 enum attr_memory memory;
15920 int dep_insn_code_number;
15922 /* Anti and output dependencies have zero cost on all CPUs. */
15923 if (REG_NOTE_KIND (link) != 0)
15926 dep_insn_code_number = recog_memoized (dep_insn);
15928 /* If we can't recognize the insns, we can't really do anything. */
15929 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15932 insn_type = get_attr_type (insn);
15933 dep_insn_type = get_attr_type (dep_insn);
15937 case PROCESSOR_PENTIUM:
15938 /* Address Generation Interlock adds a cycle of latency. */
15939 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15942 /* ??? Compares pair with jump/setcc. */
15943 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15946 /* Floating point stores require value to be ready one cycle earlier. */
15947 if (insn_type == TYPE_FMOV
15948 && get_attr_memory (insn) == MEMORY_STORE
15949 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15953 case PROCESSOR_PENTIUMPRO:
15954 memory = get_attr_memory (insn);
15956 /* INT->FP conversion is expensive. */
15957 if (get_attr_fp_int_src (dep_insn))
15960 /* There is one cycle extra latency between an FP op and a store. */
15961 if (insn_type == TYPE_FMOV
15962 && (set = single_set (dep_insn)) != NULL_RTX
15963 && (set2 = single_set (insn)) != NULL_RTX
15964 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15965 && MEM_P (SET_DEST (set2)))
15968 /* Show ability of reorder buffer to hide latency of load by executing
15969 in parallel with previous instruction in case
15970 previous instruction is not needed to compute the address. */
15971 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15972 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15974 /* Claim moves to take one cycle, as core can issue one load
15975 at time and the next load can start cycle later. */
15976 if (dep_insn_type == TYPE_IMOV
15977 || dep_insn_type == TYPE_FMOV)
15985 memory = get_attr_memory (insn);
15987 /* The esp dependency is resolved before the instruction is really
15989 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15990 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15993 /* INT->FP conversion is expensive. */
15994 if (get_attr_fp_int_src (dep_insn))
15997 /* Show ability of reorder buffer to hide latency of load by executing
15998 in parallel with previous instruction in case
15999 previous instruction is not needed to compute the address. */
16000 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16001 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16003 /* Claim moves to take one cycle, as core can issue one load
16004 at time and the next load can start cycle later. */
16005 if (dep_insn_type == TYPE_IMOV
16006 || dep_insn_type == TYPE_FMOV)
16015 case PROCESSOR_ATHLON:
16017 case PROCESSOR_AMDFAM10:
16018 case PROCESSOR_GENERIC32:
16019 case PROCESSOR_GENERIC64:
16020 memory = get_attr_memory (insn);
16022 /* Show ability of reorder buffer to hide latency of load by executing
16023 in parallel with previous instruction in case
16024 previous instruction is not needed to compute the address. */
16025 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16026 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16028 enum attr_unit unit = get_attr_unit (insn);
16031 /* Because of the difference between the length of integer and
16032 floating unit pipeline preparation stages, the memory operands
16033 for floating point are cheaper.
16035 ??? For Athlon it the difference is most probably 2. */
16036 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16039 loadcost = TARGET_ATHLON ? 2 : 0;
16041 if (cost >= loadcost)
16054 /* How many alternative schedules to try. This should be as wide as the
16055 scheduling freedom in the DFA, but no wider. Making this value too
16056 large results extra work for the scheduler. */
16059 ia32_multipass_dfa_lookahead (void)
16061 if (ix86_tune == PROCESSOR_PENTIUM)
16064 if (ix86_tune == PROCESSOR_PENTIUMPRO
16065 || ix86_tune == PROCESSOR_K6)
16073 /* Compute the alignment given to a constant that is being placed in memory.
16074 EXP is the constant and ALIGN is the alignment that the object would
16076 The value of this function is used instead of that alignment to align
16080 ix86_constant_alignment (tree exp, int align)
16082 if (TREE_CODE (exp) == REAL_CST)
16084 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16086 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16089 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16090 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16091 return BITS_PER_WORD;
16096 /* Compute the alignment for a static variable.
16097 TYPE is the data type, and ALIGN is the alignment that
16098 the object would ordinarily have. The value of this function is used
16099 instead of that alignment to align the object. */
16102 ix86_data_alignment (tree type, int align)
16104 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16106 if (AGGREGATE_TYPE_P (type)
16107 && TYPE_SIZE (type)
16108 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16109 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16110 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16111 && align < max_align)
16114 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16115 to 16byte boundary. */
16118 if (AGGREGATE_TYPE_P (type)
16119 && TYPE_SIZE (type)
16120 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16121 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16122 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16126 if (TREE_CODE (type) == ARRAY_TYPE)
16128 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16130 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16133 else if (TREE_CODE (type) == COMPLEX_TYPE)
16136 if (TYPE_MODE (type) == DCmode && align < 64)
16138 if (TYPE_MODE (type) == XCmode && align < 128)
16141 else if ((TREE_CODE (type) == RECORD_TYPE
16142 || TREE_CODE (type) == UNION_TYPE
16143 || TREE_CODE (type) == QUAL_UNION_TYPE)
16144 && TYPE_FIELDS (type))
16146 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16148 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16151 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16152 || TREE_CODE (type) == INTEGER_TYPE)
16154 if (TYPE_MODE (type) == DFmode && align < 64)
16156 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16163 /* Compute the alignment for a local variable.
16164 TYPE is the data type, and ALIGN is the alignment that
16165 the object would ordinarily have. The value of this macro is used
16166 instead of that alignment to align the object. */
16169 ix86_local_alignment (tree type, int align)
16171 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16172 to 16byte boundary. */
16175 if (AGGREGATE_TYPE_P (type)
16176 && TYPE_SIZE (type)
16177 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16178 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16179 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16182 if (TREE_CODE (type) == ARRAY_TYPE)
16184 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16186 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16189 else if (TREE_CODE (type) == COMPLEX_TYPE)
16191 if (TYPE_MODE (type) == DCmode && align < 64)
16193 if (TYPE_MODE (type) == XCmode && align < 128)
16196 else if ((TREE_CODE (type) == RECORD_TYPE
16197 || TREE_CODE (type) == UNION_TYPE
16198 || TREE_CODE (type) == QUAL_UNION_TYPE)
16199 && TYPE_FIELDS (type))
16201 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16203 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16206 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16207 || TREE_CODE (type) == INTEGER_TYPE)
16210 if (TYPE_MODE (type) == DFmode && align < 64)
16212 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16218 /* Emit RTL insns to initialize the variable parts of a trampoline.
16219 FNADDR is an RTX for the address of the function's pure code.
16220 CXT is an RTX for the static chain value for the function. */
16222 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16226 /* Compute offset from the end of the jmp to the target function. */
16227 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16228 plus_constant (tramp, 10),
16229 NULL_RTX, 1, OPTAB_DIRECT);
16230 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16231 gen_int_mode (0xb9, QImode));
16232 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16233 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16234 gen_int_mode (0xe9, QImode));
16235 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16240 /* Try to load address using shorter movl instead of movabs.
16241 We may want to support movq for kernel mode, but kernel does not use
16242 trampolines at the moment. */
16243 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16245 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16246 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16247 gen_int_mode (0xbb41, HImode));
16248 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16249 gen_lowpart (SImode, fnaddr));
16254 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16255 gen_int_mode (0xbb49, HImode));
16256 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16260 /* Load static chain using movabs to r10. */
16261 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16262 gen_int_mode (0xba49, HImode));
16263 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16266 /* Jump to the r11 */
16267 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16268 gen_int_mode (0xff49, HImode));
16269 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16270 gen_int_mode (0xe3, QImode));
16272 gcc_assert (offset <= TRAMPOLINE_SIZE);
16275 #ifdef ENABLE_EXECUTE_STACK
16276 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16277 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16281 /* Codes for all the SSE/MMX builtins. */
16284 IX86_BUILTIN_ADDPS,
16285 IX86_BUILTIN_ADDSS,
16286 IX86_BUILTIN_DIVPS,
16287 IX86_BUILTIN_DIVSS,
16288 IX86_BUILTIN_MULPS,
16289 IX86_BUILTIN_MULSS,
16290 IX86_BUILTIN_SUBPS,
16291 IX86_BUILTIN_SUBSS,
16293 IX86_BUILTIN_CMPEQPS,
16294 IX86_BUILTIN_CMPLTPS,
16295 IX86_BUILTIN_CMPLEPS,
16296 IX86_BUILTIN_CMPGTPS,
16297 IX86_BUILTIN_CMPGEPS,
16298 IX86_BUILTIN_CMPNEQPS,
16299 IX86_BUILTIN_CMPNLTPS,
16300 IX86_BUILTIN_CMPNLEPS,
16301 IX86_BUILTIN_CMPNGTPS,
16302 IX86_BUILTIN_CMPNGEPS,
16303 IX86_BUILTIN_CMPORDPS,
16304 IX86_BUILTIN_CMPUNORDPS,
16305 IX86_BUILTIN_CMPEQSS,
16306 IX86_BUILTIN_CMPLTSS,
16307 IX86_BUILTIN_CMPLESS,
16308 IX86_BUILTIN_CMPNEQSS,
16309 IX86_BUILTIN_CMPNLTSS,
16310 IX86_BUILTIN_CMPNLESS,
16311 IX86_BUILTIN_CMPNGTSS,
16312 IX86_BUILTIN_CMPNGESS,
16313 IX86_BUILTIN_CMPORDSS,
16314 IX86_BUILTIN_CMPUNORDSS,
16316 IX86_BUILTIN_COMIEQSS,
16317 IX86_BUILTIN_COMILTSS,
16318 IX86_BUILTIN_COMILESS,
16319 IX86_BUILTIN_COMIGTSS,
16320 IX86_BUILTIN_COMIGESS,
16321 IX86_BUILTIN_COMINEQSS,
16322 IX86_BUILTIN_UCOMIEQSS,
16323 IX86_BUILTIN_UCOMILTSS,
16324 IX86_BUILTIN_UCOMILESS,
16325 IX86_BUILTIN_UCOMIGTSS,
16326 IX86_BUILTIN_UCOMIGESS,
16327 IX86_BUILTIN_UCOMINEQSS,
16329 IX86_BUILTIN_CVTPI2PS,
16330 IX86_BUILTIN_CVTPS2PI,
16331 IX86_BUILTIN_CVTSI2SS,
16332 IX86_BUILTIN_CVTSI642SS,
16333 IX86_BUILTIN_CVTSS2SI,
16334 IX86_BUILTIN_CVTSS2SI64,
16335 IX86_BUILTIN_CVTTPS2PI,
16336 IX86_BUILTIN_CVTTSS2SI,
16337 IX86_BUILTIN_CVTTSS2SI64,
16339 IX86_BUILTIN_MAXPS,
16340 IX86_BUILTIN_MAXSS,
16341 IX86_BUILTIN_MINPS,
16342 IX86_BUILTIN_MINSS,
16344 IX86_BUILTIN_LOADUPS,
16345 IX86_BUILTIN_STOREUPS,
16346 IX86_BUILTIN_MOVSS,
16348 IX86_BUILTIN_MOVHLPS,
16349 IX86_BUILTIN_MOVLHPS,
16350 IX86_BUILTIN_LOADHPS,
16351 IX86_BUILTIN_LOADLPS,
16352 IX86_BUILTIN_STOREHPS,
16353 IX86_BUILTIN_STORELPS,
16355 IX86_BUILTIN_MASKMOVQ,
16356 IX86_BUILTIN_MOVMSKPS,
16357 IX86_BUILTIN_PMOVMSKB,
16359 IX86_BUILTIN_MOVNTPS,
16360 IX86_BUILTIN_MOVNTQ,
16362 IX86_BUILTIN_LOADDQU,
16363 IX86_BUILTIN_STOREDQU,
16365 IX86_BUILTIN_PACKSSWB,
16366 IX86_BUILTIN_PACKSSDW,
16367 IX86_BUILTIN_PACKUSWB,
16369 IX86_BUILTIN_PADDB,
16370 IX86_BUILTIN_PADDW,
16371 IX86_BUILTIN_PADDD,
16372 IX86_BUILTIN_PADDQ,
16373 IX86_BUILTIN_PADDSB,
16374 IX86_BUILTIN_PADDSW,
16375 IX86_BUILTIN_PADDUSB,
16376 IX86_BUILTIN_PADDUSW,
16377 IX86_BUILTIN_PSUBB,
16378 IX86_BUILTIN_PSUBW,
16379 IX86_BUILTIN_PSUBD,
16380 IX86_BUILTIN_PSUBQ,
16381 IX86_BUILTIN_PSUBSB,
16382 IX86_BUILTIN_PSUBSW,
16383 IX86_BUILTIN_PSUBUSB,
16384 IX86_BUILTIN_PSUBUSW,
16387 IX86_BUILTIN_PANDN,
16391 IX86_BUILTIN_PAVGB,
16392 IX86_BUILTIN_PAVGW,
16394 IX86_BUILTIN_PCMPEQB,
16395 IX86_BUILTIN_PCMPEQW,
16396 IX86_BUILTIN_PCMPEQD,
16397 IX86_BUILTIN_PCMPGTB,
16398 IX86_BUILTIN_PCMPGTW,
16399 IX86_BUILTIN_PCMPGTD,
16401 IX86_BUILTIN_PMADDWD,
16403 IX86_BUILTIN_PMAXSW,
16404 IX86_BUILTIN_PMAXUB,
16405 IX86_BUILTIN_PMINSW,
16406 IX86_BUILTIN_PMINUB,
16408 IX86_BUILTIN_PMULHUW,
16409 IX86_BUILTIN_PMULHW,
16410 IX86_BUILTIN_PMULLW,
16412 IX86_BUILTIN_PSADBW,
16413 IX86_BUILTIN_PSHUFW,
16415 IX86_BUILTIN_PSLLW,
16416 IX86_BUILTIN_PSLLD,
16417 IX86_BUILTIN_PSLLQ,
16418 IX86_BUILTIN_PSRAW,
16419 IX86_BUILTIN_PSRAD,
16420 IX86_BUILTIN_PSRLW,
16421 IX86_BUILTIN_PSRLD,
16422 IX86_BUILTIN_PSRLQ,
16423 IX86_BUILTIN_PSLLWI,
16424 IX86_BUILTIN_PSLLDI,
16425 IX86_BUILTIN_PSLLQI,
16426 IX86_BUILTIN_PSRAWI,
16427 IX86_BUILTIN_PSRADI,
16428 IX86_BUILTIN_PSRLWI,
16429 IX86_BUILTIN_PSRLDI,
16430 IX86_BUILTIN_PSRLQI,
16432 IX86_BUILTIN_PUNPCKHBW,
16433 IX86_BUILTIN_PUNPCKHWD,
16434 IX86_BUILTIN_PUNPCKHDQ,
16435 IX86_BUILTIN_PUNPCKLBW,
16436 IX86_BUILTIN_PUNPCKLWD,
16437 IX86_BUILTIN_PUNPCKLDQ,
16439 IX86_BUILTIN_SHUFPS,
16441 IX86_BUILTIN_RCPPS,
16442 IX86_BUILTIN_RCPSS,
16443 IX86_BUILTIN_RSQRTPS,
16444 IX86_BUILTIN_RSQRTSS,
16445 IX86_BUILTIN_SQRTPS,
16446 IX86_BUILTIN_SQRTSS,
16448 IX86_BUILTIN_UNPCKHPS,
16449 IX86_BUILTIN_UNPCKLPS,
16451 IX86_BUILTIN_ANDPS,
16452 IX86_BUILTIN_ANDNPS,
16454 IX86_BUILTIN_XORPS,
16457 IX86_BUILTIN_LDMXCSR,
16458 IX86_BUILTIN_STMXCSR,
16459 IX86_BUILTIN_SFENCE,
16461 /* 3DNow! Original */
16462 IX86_BUILTIN_FEMMS,
16463 IX86_BUILTIN_PAVGUSB,
16464 IX86_BUILTIN_PF2ID,
16465 IX86_BUILTIN_PFACC,
16466 IX86_BUILTIN_PFADD,
16467 IX86_BUILTIN_PFCMPEQ,
16468 IX86_BUILTIN_PFCMPGE,
16469 IX86_BUILTIN_PFCMPGT,
16470 IX86_BUILTIN_PFMAX,
16471 IX86_BUILTIN_PFMIN,
16472 IX86_BUILTIN_PFMUL,
16473 IX86_BUILTIN_PFRCP,
16474 IX86_BUILTIN_PFRCPIT1,
16475 IX86_BUILTIN_PFRCPIT2,
16476 IX86_BUILTIN_PFRSQIT1,
16477 IX86_BUILTIN_PFRSQRT,
16478 IX86_BUILTIN_PFSUB,
16479 IX86_BUILTIN_PFSUBR,
16480 IX86_BUILTIN_PI2FD,
16481 IX86_BUILTIN_PMULHRW,
16483 /* 3DNow! Athlon Extensions */
16484 IX86_BUILTIN_PF2IW,
16485 IX86_BUILTIN_PFNACC,
16486 IX86_BUILTIN_PFPNACC,
16487 IX86_BUILTIN_PI2FW,
16488 IX86_BUILTIN_PSWAPDSI,
16489 IX86_BUILTIN_PSWAPDSF,
16492 IX86_BUILTIN_ADDPD,
16493 IX86_BUILTIN_ADDSD,
16494 IX86_BUILTIN_DIVPD,
16495 IX86_BUILTIN_DIVSD,
16496 IX86_BUILTIN_MULPD,
16497 IX86_BUILTIN_MULSD,
16498 IX86_BUILTIN_SUBPD,
16499 IX86_BUILTIN_SUBSD,
16501 IX86_BUILTIN_CMPEQPD,
16502 IX86_BUILTIN_CMPLTPD,
16503 IX86_BUILTIN_CMPLEPD,
16504 IX86_BUILTIN_CMPGTPD,
16505 IX86_BUILTIN_CMPGEPD,
16506 IX86_BUILTIN_CMPNEQPD,
16507 IX86_BUILTIN_CMPNLTPD,
16508 IX86_BUILTIN_CMPNLEPD,
16509 IX86_BUILTIN_CMPNGTPD,
16510 IX86_BUILTIN_CMPNGEPD,
16511 IX86_BUILTIN_CMPORDPD,
16512 IX86_BUILTIN_CMPUNORDPD,
16513 IX86_BUILTIN_CMPEQSD,
16514 IX86_BUILTIN_CMPLTSD,
16515 IX86_BUILTIN_CMPLESD,
16516 IX86_BUILTIN_CMPNEQSD,
16517 IX86_BUILTIN_CMPNLTSD,
16518 IX86_BUILTIN_CMPNLESD,
16519 IX86_BUILTIN_CMPORDSD,
16520 IX86_BUILTIN_CMPUNORDSD,
16522 IX86_BUILTIN_COMIEQSD,
16523 IX86_BUILTIN_COMILTSD,
16524 IX86_BUILTIN_COMILESD,
16525 IX86_BUILTIN_COMIGTSD,
16526 IX86_BUILTIN_COMIGESD,
16527 IX86_BUILTIN_COMINEQSD,
16528 IX86_BUILTIN_UCOMIEQSD,
16529 IX86_BUILTIN_UCOMILTSD,
16530 IX86_BUILTIN_UCOMILESD,
16531 IX86_BUILTIN_UCOMIGTSD,
16532 IX86_BUILTIN_UCOMIGESD,
16533 IX86_BUILTIN_UCOMINEQSD,
16535 IX86_BUILTIN_MAXPD,
16536 IX86_BUILTIN_MAXSD,
16537 IX86_BUILTIN_MINPD,
16538 IX86_BUILTIN_MINSD,
16540 IX86_BUILTIN_ANDPD,
16541 IX86_BUILTIN_ANDNPD,
16543 IX86_BUILTIN_XORPD,
16545 IX86_BUILTIN_SQRTPD,
16546 IX86_BUILTIN_SQRTSD,
16548 IX86_BUILTIN_UNPCKHPD,
16549 IX86_BUILTIN_UNPCKLPD,
16551 IX86_BUILTIN_SHUFPD,
16553 IX86_BUILTIN_LOADUPD,
16554 IX86_BUILTIN_STOREUPD,
16555 IX86_BUILTIN_MOVSD,
16557 IX86_BUILTIN_LOADHPD,
16558 IX86_BUILTIN_LOADLPD,
16560 IX86_BUILTIN_CVTDQ2PD,
16561 IX86_BUILTIN_CVTDQ2PS,
16563 IX86_BUILTIN_CVTPD2DQ,
16564 IX86_BUILTIN_CVTPD2PI,
16565 IX86_BUILTIN_CVTPD2PS,
16566 IX86_BUILTIN_CVTTPD2DQ,
16567 IX86_BUILTIN_CVTTPD2PI,
16569 IX86_BUILTIN_CVTPI2PD,
16570 IX86_BUILTIN_CVTSI2SD,
16571 IX86_BUILTIN_CVTSI642SD,
16573 IX86_BUILTIN_CVTSD2SI,
16574 IX86_BUILTIN_CVTSD2SI64,
16575 IX86_BUILTIN_CVTSD2SS,
16576 IX86_BUILTIN_CVTSS2SD,
16577 IX86_BUILTIN_CVTTSD2SI,
16578 IX86_BUILTIN_CVTTSD2SI64,
16580 IX86_BUILTIN_CVTPS2DQ,
16581 IX86_BUILTIN_CVTPS2PD,
16582 IX86_BUILTIN_CVTTPS2DQ,
16584 IX86_BUILTIN_MOVNTI,
16585 IX86_BUILTIN_MOVNTPD,
16586 IX86_BUILTIN_MOVNTDQ,
16589 IX86_BUILTIN_MASKMOVDQU,
16590 IX86_BUILTIN_MOVMSKPD,
16591 IX86_BUILTIN_PMOVMSKB128,
16593 IX86_BUILTIN_PACKSSWB128,
16594 IX86_BUILTIN_PACKSSDW128,
16595 IX86_BUILTIN_PACKUSWB128,
16597 IX86_BUILTIN_PADDB128,
16598 IX86_BUILTIN_PADDW128,
16599 IX86_BUILTIN_PADDD128,
16600 IX86_BUILTIN_PADDQ128,
16601 IX86_BUILTIN_PADDSB128,
16602 IX86_BUILTIN_PADDSW128,
16603 IX86_BUILTIN_PADDUSB128,
16604 IX86_BUILTIN_PADDUSW128,
16605 IX86_BUILTIN_PSUBB128,
16606 IX86_BUILTIN_PSUBW128,
16607 IX86_BUILTIN_PSUBD128,
16608 IX86_BUILTIN_PSUBQ128,
16609 IX86_BUILTIN_PSUBSB128,
16610 IX86_BUILTIN_PSUBSW128,
16611 IX86_BUILTIN_PSUBUSB128,
16612 IX86_BUILTIN_PSUBUSW128,
16614 IX86_BUILTIN_PAND128,
16615 IX86_BUILTIN_PANDN128,
16616 IX86_BUILTIN_POR128,
16617 IX86_BUILTIN_PXOR128,
16619 IX86_BUILTIN_PAVGB128,
16620 IX86_BUILTIN_PAVGW128,
16622 IX86_BUILTIN_PCMPEQB128,
16623 IX86_BUILTIN_PCMPEQW128,
16624 IX86_BUILTIN_PCMPEQD128,
16625 IX86_BUILTIN_PCMPGTB128,
16626 IX86_BUILTIN_PCMPGTW128,
16627 IX86_BUILTIN_PCMPGTD128,
16629 IX86_BUILTIN_PMADDWD128,
16631 IX86_BUILTIN_PMAXSW128,
16632 IX86_BUILTIN_PMAXUB128,
16633 IX86_BUILTIN_PMINSW128,
16634 IX86_BUILTIN_PMINUB128,
16636 IX86_BUILTIN_PMULUDQ,
16637 IX86_BUILTIN_PMULUDQ128,
16638 IX86_BUILTIN_PMULHUW128,
16639 IX86_BUILTIN_PMULHW128,
16640 IX86_BUILTIN_PMULLW128,
16642 IX86_BUILTIN_PSADBW128,
16643 IX86_BUILTIN_PSHUFHW,
16644 IX86_BUILTIN_PSHUFLW,
16645 IX86_BUILTIN_PSHUFD,
16647 IX86_BUILTIN_PSLLDQI128,
16648 IX86_BUILTIN_PSLLWI128,
16649 IX86_BUILTIN_PSLLDI128,
16650 IX86_BUILTIN_PSLLQI128,
16651 IX86_BUILTIN_PSRAWI128,
16652 IX86_BUILTIN_PSRADI128,
16653 IX86_BUILTIN_PSRLDQI128,
16654 IX86_BUILTIN_PSRLWI128,
16655 IX86_BUILTIN_PSRLDI128,
16656 IX86_BUILTIN_PSRLQI128,
16658 IX86_BUILTIN_PSLLDQ128,
16659 IX86_BUILTIN_PSLLW128,
16660 IX86_BUILTIN_PSLLD128,
16661 IX86_BUILTIN_PSLLQ128,
16662 IX86_BUILTIN_PSRAW128,
16663 IX86_BUILTIN_PSRAD128,
16664 IX86_BUILTIN_PSRLW128,
16665 IX86_BUILTIN_PSRLD128,
16666 IX86_BUILTIN_PSRLQ128,
16668 IX86_BUILTIN_PUNPCKHBW128,
16669 IX86_BUILTIN_PUNPCKHWD128,
16670 IX86_BUILTIN_PUNPCKHDQ128,
16671 IX86_BUILTIN_PUNPCKHQDQ128,
16672 IX86_BUILTIN_PUNPCKLBW128,
16673 IX86_BUILTIN_PUNPCKLWD128,
16674 IX86_BUILTIN_PUNPCKLDQ128,
16675 IX86_BUILTIN_PUNPCKLQDQ128,
16677 IX86_BUILTIN_CLFLUSH,
16678 IX86_BUILTIN_MFENCE,
16679 IX86_BUILTIN_LFENCE,
16681 /* Prescott New Instructions. */
16682 IX86_BUILTIN_ADDSUBPS,
16683 IX86_BUILTIN_HADDPS,
16684 IX86_BUILTIN_HSUBPS,
16685 IX86_BUILTIN_MOVSHDUP,
16686 IX86_BUILTIN_MOVSLDUP,
16687 IX86_BUILTIN_ADDSUBPD,
16688 IX86_BUILTIN_HADDPD,
16689 IX86_BUILTIN_HSUBPD,
16690 IX86_BUILTIN_LDDQU,
16692 IX86_BUILTIN_MONITOR,
16693 IX86_BUILTIN_MWAIT,
16696 IX86_BUILTIN_PHADDW,
16697 IX86_BUILTIN_PHADDD,
16698 IX86_BUILTIN_PHADDSW,
16699 IX86_BUILTIN_PHSUBW,
16700 IX86_BUILTIN_PHSUBD,
16701 IX86_BUILTIN_PHSUBSW,
16702 IX86_BUILTIN_PMADDUBSW,
16703 IX86_BUILTIN_PMULHRSW,
16704 IX86_BUILTIN_PSHUFB,
16705 IX86_BUILTIN_PSIGNB,
16706 IX86_BUILTIN_PSIGNW,
16707 IX86_BUILTIN_PSIGND,
16708 IX86_BUILTIN_PALIGNR,
16709 IX86_BUILTIN_PABSB,
16710 IX86_BUILTIN_PABSW,
16711 IX86_BUILTIN_PABSD,
16713 IX86_BUILTIN_PHADDW128,
16714 IX86_BUILTIN_PHADDD128,
16715 IX86_BUILTIN_PHADDSW128,
16716 IX86_BUILTIN_PHSUBW128,
16717 IX86_BUILTIN_PHSUBD128,
16718 IX86_BUILTIN_PHSUBSW128,
16719 IX86_BUILTIN_PMADDUBSW128,
16720 IX86_BUILTIN_PMULHRSW128,
16721 IX86_BUILTIN_PSHUFB128,
16722 IX86_BUILTIN_PSIGNB128,
16723 IX86_BUILTIN_PSIGNW128,
16724 IX86_BUILTIN_PSIGND128,
16725 IX86_BUILTIN_PALIGNR128,
16726 IX86_BUILTIN_PABSB128,
16727 IX86_BUILTIN_PABSW128,
16728 IX86_BUILTIN_PABSD128,
16730 /* AMDFAM10 - SSE4A New Instructions. */
16731 IX86_BUILTIN_MOVNTSD,
16732 IX86_BUILTIN_MOVNTSS,
16733 IX86_BUILTIN_EXTRQI,
16734 IX86_BUILTIN_EXTRQ,
16735 IX86_BUILTIN_INSERTQI,
16736 IX86_BUILTIN_INSERTQ,
16739 IX86_BUILTIN_BLENDPD,
16740 IX86_BUILTIN_BLENDPS,
16741 IX86_BUILTIN_BLENDVPD,
16742 IX86_BUILTIN_BLENDVPS,
16743 IX86_BUILTIN_PBLENDVB128,
16744 IX86_BUILTIN_PBLENDW128,
16749 IX86_BUILTIN_INSERTPS128,
16751 IX86_BUILTIN_MOVNTDQA,
16752 IX86_BUILTIN_MPSADBW128,
16753 IX86_BUILTIN_PACKUSDW128,
16754 IX86_BUILTIN_PCMPEQQ,
16755 IX86_BUILTIN_PHMINPOSUW128,
16757 IX86_BUILTIN_PMAXSB128,
16758 IX86_BUILTIN_PMAXSD128,
16759 IX86_BUILTIN_PMAXUD128,
16760 IX86_BUILTIN_PMAXUW128,
16762 IX86_BUILTIN_PMINSB128,
16763 IX86_BUILTIN_PMINSD128,
16764 IX86_BUILTIN_PMINUD128,
16765 IX86_BUILTIN_PMINUW128,
16767 IX86_BUILTIN_PMOVSXBW128,
16768 IX86_BUILTIN_PMOVSXBD128,
16769 IX86_BUILTIN_PMOVSXBQ128,
16770 IX86_BUILTIN_PMOVSXWD128,
16771 IX86_BUILTIN_PMOVSXWQ128,
16772 IX86_BUILTIN_PMOVSXDQ128,
16774 IX86_BUILTIN_PMOVZXBW128,
16775 IX86_BUILTIN_PMOVZXBD128,
16776 IX86_BUILTIN_PMOVZXBQ128,
16777 IX86_BUILTIN_PMOVZXWD128,
16778 IX86_BUILTIN_PMOVZXWQ128,
16779 IX86_BUILTIN_PMOVZXDQ128,
16781 IX86_BUILTIN_PMULDQ128,
16782 IX86_BUILTIN_PMULLD128,
16784 IX86_BUILTIN_ROUNDPD,
16785 IX86_BUILTIN_ROUNDPS,
16786 IX86_BUILTIN_ROUNDSD,
16787 IX86_BUILTIN_ROUNDSS,
16789 IX86_BUILTIN_PTESTZ,
16790 IX86_BUILTIN_PTESTC,
16791 IX86_BUILTIN_PTESTNZC,
16793 IX86_BUILTIN_VEC_INIT_V2SI,
16794 IX86_BUILTIN_VEC_INIT_V4HI,
16795 IX86_BUILTIN_VEC_INIT_V8QI,
16796 IX86_BUILTIN_VEC_EXT_V2DF,
16797 IX86_BUILTIN_VEC_EXT_V2DI,
16798 IX86_BUILTIN_VEC_EXT_V4SF,
16799 IX86_BUILTIN_VEC_EXT_V4SI,
16800 IX86_BUILTIN_VEC_EXT_V8HI,
16801 IX86_BUILTIN_VEC_EXT_V2SI,
16802 IX86_BUILTIN_VEC_EXT_V4HI,
16803 IX86_BUILTIN_VEC_EXT_V16QI,
16804 IX86_BUILTIN_VEC_SET_V2DI,
16805 IX86_BUILTIN_VEC_SET_V4SF,
16806 IX86_BUILTIN_VEC_SET_V4SI,
16807 IX86_BUILTIN_VEC_SET_V8HI,
16808 IX86_BUILTIN_VEC_SET_V4HI,
16809 IX86_BUILTIN_VEC_SET_V16QI,
16812 IX86_BUILTIN_CRC32QI,
16813 IX86_BUILTIN_CRC32HI,
16814 IX86_BUILTIN_CRC32SI,
16815 IX86_BUILTIN_CRC32DI,
16817 IX86_BUILTIN_PCMPESTRI128,
16818 IX86_BUILTIN_PCMPESTRM128,
16819 IX86_BUILTIN_PCMPESTRA128,
16820 IX86_BUILTIN_PCMPESTRC128,
16821 IX86_BUILTIN_PCMPESTRO128,
16822 IX86_BUILTIN_PCMPESTRS128,
16823 IX86_BUILTIN_PCMPESTRZ128,
16824 IX86_BUILTIN_PCMPISTRI128,
16825 IX86_BUILTIN_PCMPISTRM128,
16826 IX86_BUILTIN_PCMPISTRA128,
16827 IX86_BUILTIN_PCMPISTRC128,
16828 IX86_BUILTIN_PCMPISTRO128,
16829 IX86_BUILTIN_PCMPISTRS128,
16830 IX86_BUILTIN_PCMPISTRZ128,
16832 IX86_BUILTIN_PCMPGTQ,
16834 /* TFmode support builtins. */
16836 IX86_BUILTIN_FABSQ,
16837 IX86_BUILTIN_COPYSIGNQ,
16842 /* Table for the ix86 builtin decls. */
16843 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16845 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16846 * if the target_flags include one of MASK. Stores the function decl
16847 * in the ix86_builtins array.
16848 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16851 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16853 tree decl = NULL_TREE;
16855 if (mask & ix86_isa_flags
16856 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16858 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16860 ix86_builtins[(int) code] = decl;
16866 /* Like def_builtin, but also marks the function decl "const". */
16869 def_builtin_const (int mask, const char *name, tree type,
16870 enum ix86_builtins code)
16872 tree decl = def_builtin (mask, name, type, code);
16874 TREE_READONLY (decl) = 1;
16878 /* Bits for builtin_description.flag. */
16880 /* Set when we don't support the comparison natively, and should
16881 swap_comparison in order to support it. */
16882 #define BUILTIN_DESC_SWAP_OPERANDS 1
16884 struct builtin_description
16886 const unsigned int mask;
16887 const enum insn_code icode;
16888 const char *const name;
16889 const enum ix86_builtins code;
16890 const enum rtx_code comparison;
16894 static const struct builtin_description bdesc_comi[] =
16896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16922 static const struct builtin_description bdesc_ptest[] =
16925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16930 static const struct builtin_description bdesc_pcmpestr[] =
16933 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16934 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16935 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16936 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16937 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16938 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16939 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16942 static const struct builtin_description bdesc_pcmpistr[] =
16945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16947 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16948 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16949 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16950 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16951 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16954 static const struct builtin_description bdesc_crc32[] =
16957 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16963 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16964 static const struct builtin_description bdesc_sse_3arg[] =
16967 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16968 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16969 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16970 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16981 static const struct builtin_description bdesc_2arg[] =
16984 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16985 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
16986 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
16987 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
16988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
16989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
16990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
16991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
16993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
16997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17016 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17017 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17021 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17023 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17024 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17040 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17070 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17089 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17115 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17116 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17149 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17151 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17165 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17168 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17177 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17228 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17233 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17242 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17243 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17289 static const struct builtin_description bdesc_1arg[] =
17291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17335 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17336 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17357 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17362 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17363 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17366 ix86_init_mmx_sse_builtins (void)
17368 const struct builtin_description * d;
17371 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17372 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17373 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17374 tree V2DI_type_node
17375 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17376 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17377 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17378 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17379 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17380 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17381 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17383 tree pchar_type_node = build_pointer_type (char_type_node);
17384 tree pcchar_type_node = build_pointer_type (
17385 build_type_variant (char_type_node, 1, 0));
17386 tree pfloat_type_node = build_pointer_type (float_type_node);
17387 tree pcfloat_type_node = build_pointer_type (
17388 build_type_variant (float_type_node, 1, 0));
17389 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17390 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17391 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17394 tree int_ftype_v4sf_v4sf
17395 = build_function_type_list (integer_type_node,
17396 V4SF_type_node, V4SF_type_node, NULL_TREE);
17397 tree v4si_ftype_v4sf_v4sf
17398 = build_function_type_list (V4SI_type_node,
17399 V4SF_type_node, V4SF_type_node, NULL_TREE);
17400 /* MMX/SSE/integer conversions. */
17401 tree int_ftype_v4sf
17402 = build_function_type_list (integer_type_node,
17403 V4SF_type_node, NULL_TREE);
17404 tree int64_ftype_v4sf
17405 = build_function_type_list (long_long_integer_type_node,
17406 V4SF_type_node, NULL_TREE);
17407 tree int_ftype_v8qi
17408 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17409 tree v4sf_ftype_v4sf_int
17410 = build_function_type_list (V4SF_type_node,
17411 V4SF_type_node, integer_type_node, NULL_TREE);
17412 tree v4sf_ftype_v4sf_int64
17413 = build_function_type_list (V4SF_type_node,
17414 V4SF_type_node, long_long_integer_type_node,
17416 tree v4sf_ftype_v4sf_v2si
17417 = build_function_type_list (V4SF_type_node,
17418 V4SF_type_node, V2SI_type_node, NULL_TREE);
17420 /* Miscellaneous. */
17421 tree v8qi_ftype_v4hi_v4hi
17422 = build_function_type_list (V8QI_type_node,
17423 V4HI_type_node, V4HI_type_node, NULL_TREE);
17424 tree v4hi_ftype_v2si_v2si
17425 = build_function_type_list (V4HI_type_node,
17426 V2SI_type_node, V2SI_type_node, NULL_TREE);
17427 tree v4sf_ftype_v4sf_v4sf_int
17428 = build_function_type_list (V4SF_type_node,
17429 V4SF_type_node, V4SF_type_node,
17430 integer_type_node, NULL_TREE);
17431 tree v2si_ftype_v4hi_v4hi
17432 = build_function_type_list (V2SI_type_node,
17433 V4HI_type_node, V4HI_type_node, NULL_TREE);
17434 tree v4hi_ftype_v4hi_int
17435 = build_function_type_list (V4HI_type_node,
17436 V4HI_type_node, integer_type_node, NULL_TREE);
17437 tree v4hi_ftype_v4hi_di
17438 = build_function_type_list (V4HI_type_node,
17439 V4HI_type_node, long_long_unsigned_type_node,
17441 tree v2si_ftype_v2si_di
17442 = build_function_type_list (V2SI_type_node,
17443 V2SI_type_node, long_long_unsigned_type_node,
17445 tree void_ftype_void
17446 = build_function_type (void_type_node, void_list_node);
17447 tree void_ftype_unsigned
17448 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17449 tree void_ftype_unsigned_unsigned
17450 = build_function_type_list (void_type_node, unsigned_type_node,
17451 unsigned_type_node, NULL_TREE);
17452 tree void_ftype_pcvoid_unsigned_unsigned
17453 = build_function_type_list (void_type_node, const_ptr_type_node,
17454 unsigned_type_node, unsigned_type_node,
17456 tree unsigned_ftype_void
17457 = build_function_type (unsigned_type_node, void_list_node);
17458 tree v2si_ftype_v4sf
17459 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17460 /* Loads/stores. */
17461 tree void_ftype_v8qi_v8qi_pchar
17462 = build_function_type_list (void_type_node,
17463 V8QI_type_node, V8QI_type_node,
17464 pchar_type_node, NULL_TREE);
17465 tree v4sf_ftype_pcfloat
17466 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17467 /* @@@ the type is bogus */
17468 tree v4sf_ftype_v4sf_pv2si
17469 = build_function_type_list (V4SF_type_node,
17470 V4SF_type_node, pv2si_type_node, NULL_TREE);
17471 tree void_ftype_pv2si_v4sf
17472 = build_function_type_list (void_type_node,
17473 pv2si_type_node, V4SF_type_node, NULL_TREE);
17474 tree void_ftype_pfloat_v4sf
17475 = build_function_type_list (void_type_node,
17476 pfloat_type_node, V4SF_type_node, NULL_TREE);
17477 tree void_ftype_pdi_di
17478 = build_function_type_list (void_type_node,
17479 pdi_type_node, long_long_unsigned_type_node,
17481 tree void_ftype_pv2di_v2di
17482 = build_function_type_list (void_type_node,
17483 pv2di_type_node, V2DI_type_node, NULL_TREE);
17484 /* Normal vector unops. */
17485 tree v4sf_ftype_v4sf
17486 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17487 tree v16qi_ftype_v16qi
17488 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17489 tree v8hi_ftype_v8hi
17490 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17491 tree v4si_ftype_v4si
17492 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17493 tree v8qi_ftype_v8qi
17494 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17495 tree v4hi_ftype_v4hi
17496 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17498 /* Normal vector binops. */
17499 tree v4sf_ftype_v4sf_v4sf
17500 = build_function_type_list (V4SF_type_node,
17501 V4SF_type_node, V4SF_type_node, NULL_TREE);
17502 tree v8qi_ftype_v8qi_v8qi
17503 = build_function_type_list (V8QI_type_node,
17504 V8QI_type_node, V8QI_type_node, NULL_TREE);
17505 tree v4hi_ftype_v4hi_v4hi
17506 = build_function_type_list (V4HI_type_node,
17507 V4HI_type_node, V4HI_type_node, NULL_TREE);
17508 tree v2si_ftype_v2si_v2si
17509 = build_function_type_list (V2SI_type_node,
17510 V2SI_type_node, V2SI_type_node, NULL_TREE);
17511 tree di_ftype_di_di
17512 = build_function_type_list (long_long_unsigned_type_node,
17513 long_long_unsigned_type_node,
17514 long_long_unsigned_type_node, NULL_TREE);
17516 tree di_ftype_di_di_int
17517 = build_function_type_list (long_long_unsigned_type_node,
17518 long_long_unsigned_type_node,
17519 long_long_unsigned_type_node,
17520 integer_type_node, NULL_TREE);
17522 tree v2si_ftype_v2sf
17523 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17524 tree v2sf_ftype_v2si
17525 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17526 tree v2si_ftype_v2si
17527 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17528 tree v2sf_ftype_v2sf
17529 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17530 tree v2sf_ftype_v2sf_v2sf
17531 = build_function_type_list (V2SF_type_node,
17532 V2SF_type_node, V2SF_type_node, NULL_TREE);
17533 tree v2si_ftype_v2sf_v2sf
17534 = build_function_type_list (V2SI_type_node,
17535 V2SF_type_node, V2SF_type_node, NULL_TREE);
17536 tree pint_type_node = build_pointer_type (integer_type_node);
17537 tree pdouble_type_node = build_pointer_type (double_type_node);
17538 tree pcdouble_type_node = build_pointer_type (
17539 build_type_variant (double_type_node, 1, 0));
17540 tree int_ftype_v2df_v2df
17541 = build_function_type_list (integer_type_node,
17542 V2DF_type_node, V2DF_type_node, NULL_TREE);
17544 tree void_ftype_pcvoid
17545 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17546 tree v4sf_ftype_v4si
17547 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17548 tree v4si_ftype_v4sf
17549 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17550 tree v2df_ftype_v4si
17551 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17552 tree v4si_ftype_v2df
17553 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17554 tree v2si_ftype_v2df
17555 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17556 tree v4sf_ftype_v2df
17557 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17558 tree v2df_ftype_v2si
17559 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17560 tree v2df_ftype_v4sf
17561 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17562 tree int_ftype_v2df
17563 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17564 tree int64_ftype_v2df
17565 = build_function_type_list (long_long_integer_type_node,
17566 V2DF_type_node, NULL_TREE);
17567 tree v2df_ftype_v2df_int
17568 = build_function_type_list (V2DF_type_node,
17569 V2DF_type_node, integer_type_node, NULL_TREE);
17570 tree v2df_ftype_v2df_int64
17571 = build_function_type_list (V2DF_type_node,
17572 V2DF_type_node, long_long_integer_type_node,
17574 tree v4sf_ftype_v4sf_v2df
17575 = build_function_type_list (V4SF_type_node,
17576 V4SF_type_node, V2DF_type_node, NULL_TREE);
17577 tree v2df_ftype_v2df_v4sf
17578 = build_function_type_list (V2DF_type_node,
17579 V2DF_type_node, V4SF_type_node, NULL_TREE);
17580 tree v2df_ftype_v2df_v2df_int
17581 = build_function_type_list (V2DF_type_node,
17582 V2DF_type_node, V2DF_type_node,
17585 tree v2df_ftype_v2df_pcdouble
17586 = build_function_type_list (V2DF_type_node,
17587 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17588 tree void_ftype_pdouble_v2df
17589 = build_function_type_list (void_type_node,
17590 pdouble_type_node, V2DF_type_node, NULL_TREE);
17591 tree void_ftype_pint_int
17592 = build_function_type_list (void_type_node,
17593 pint_type_node, integer_type_node, NULL_TREE);
17594 tree void_ftype_v16qi_v16qi_pchar
17595 = build_function_type_list (void_type_node,
17596 V16QI_type_node, V16QI_type_node,
17597 pchar_type_node, NULL_TREE);
17598 tree v2df_ftype_pcdouble
17599 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17600 tree v2df_ftype_v2df_v2df
17601 = build_function_type_list (V2DF_type_node,
17602 V2DF_type_node, V2DF_type_node, NULL_TREE);
17603 tree v16qi_ftype_v16qi_v16qi
17604 = build_function_type_list (V16QI_type_node,
17605 V16QI_type_node, V16QI_type_node, NULL_TREE);
17606 tree v8hi_ftype_v8hi_v8hi
17607 = build_function_type_list (V8HI_type_node,
17608 V8HI_type_node, V8HI_type_node, NULL_TREE);
17609 tree v4si_ftype_v4si_v4si
17610 = build_function_type_list (V4SI_type_node,
17611 V4SI_type_node, V4SI_type_node, NULL_TREE);
17612 tree v2di_ftype_v2di_v2di
17613 = build_function_type_list (V2DI_type_node,
17614 V2DI_type_node, V2DI_type_node, NULL_TREE);
17615 tree v2di_ftype_v2df_v2df
17616 = build_function_type_list (V2DI_type_node,
17617 V2DF_type_node, V2DF_type_node, NULL_TREE);
17618 tree v2df_ftype_v2df
17619 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17620 tree v2di_ftype_v2di_int
17621 = build_function_type_list (V2DI_type_node,
17622 V2DI_type_node, integer_type_node, NULL_TREE);
17623 tree v2di_ftype_v2di_v2di_int
17624 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17625 V2DI_type_node, integer_type_node, NULL_TREE);
17626 tree v4si_ftype_v4si_int
17627 = build_function_type_list (V4SI_type_node,
17628 V4SI_type_node, integer_type_node, NULL_TREE);
17629 tree v8hi_ftype_v8hi_int
17630 = build_function_type_list (V8HI_type_node,
17631 V8HI_type_node, integer_type_node, NULL_TREE);
17632 tree v4si_ftype_v8hi_v8hi
17633 = build_function_type_list (V4SI_type_node,
17634 V8HI_type_node, V8HI_type_node, NULL_TREE);
17635 tree di_ftype_v8qi_v8qi
17636 = build_function_type_list (long_long_unsigned_type_node,
17637 V8QI_type_node, V8QI_type_node, NULL_TREE);
17638 tree di_ftype_v2si_v2si
17639 = build_function_type_list (long_long_unsigned_type_node,
17640 V2SI_type_node, V2SI_type_node, NULL_TREE);
17641 tree v2di_ftype_v16qi_v16qi
17642 = build_function_type_list (V2DI_type_node,
17643 V16QI_type_node, V16QI_type_node, NULL_TREE);
17644 tree v2di_ftype_v4si_v4si
17645 = build_function_type_list (V2DI_type_node,
17646 V4SI_type_node, V4SI_type_node, NULL_TREE);
17647 tree int_ftype_v16qi
17648 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17649 tree v16qi_ftype_pcchar
17650 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17651 tree void_ftype_pchar_v16qi
17652 = build_function_type_list (void_type_node,
17653 pchar_type_node, V16QI_type_node, NULL_TREE);
17655 tree v2di_ftype_v2di_unsigned_unsigned
17656 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17657 unsigned_type_node, unsigned_type_node,
17659 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17660 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17661 unsigned_type_node, unsigned_type_node,
17663 tree v2di_ftype_v2di_v16qi
17664 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17666 tree v2df_ftype_v2df_v2df_v2df
17667 = build_function_type_list (V2DF_type_node,
17668 V2DF_type_node, V2DF_type_node,
17669 V2DF_type_node, NULL_TREE);
17670 tree v4sf_ftype_v4sf_v4sf_v4sf
17671 = build_function_type_list (V4SF_type_node,
17672 V4SF_type_node, V4SF_type_node,
17673 V4SF_type_node, NULL_TREE);
17674 tree v8hi_ftype_v16qi
17675 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17677 tree v4si_ftype_v16qi
17678 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17680 tree v2di_ftype_v16qi
17681 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17683 tree v4si_ftype_v8hi
17684 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17686 tree v2di_ftype_v8hi
17687 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17689 tree v2di_ftype_v4si
17690 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17692 tree v2di_ftype_pv2di
17693 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17695 tree v16qi_ftype_v16qi_v16qi_int
17696 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17697 V16QI_type_node, integer_type_node,
17699 tree v16qi_ftype_v16qi_v16qi_v16qi
17700 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17701 V16QI_type_node, V16QI_type_node,
17703 tree v8hi_ftype_v8hi_v8hi_int
17704 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17705 V8HI_type_node, integer_type_node,
17707 tree v4si_ftype_v4si_v4si_int
17708 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17709 V4SI_type_node, integer_type_node,
17711 tree int_ftype_v2di_v2di
17712 = build_function_type_list (integer_type_node,
17713 V2DI_type_node, V2DI_type_node,
17715 tree int_ftype_v16qi_int_v16qi_int_int
17716 = build_function_type_list (integer_type_node,
17723 tree v16qi_ftype_v16qi_int_v16qi_int_int
17724 = build_function_type_list (V16QI_type_node,
17731 tree int_ftype_v16qi_v16qi_int
17732 = build_function_type_list (integer_type_node,
17739 /* The __float80 type. */
17740 if (TYPE_MODE (long_double_type_node) == XFmode)
17741 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17745 /* The __float80 type. */
17746 tree float80_type_node = make_node (REAL_TYPE);
17748 TYPE_PRECISION (float80_type_node) = 80;
17749 layout_type (float80_type_node);
17750 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17756 tree float128_type_node = make_node (REAL_TYPE);
17758 TYPE_PRECISION (float128_type_node) = 128;
17759 layout_type (float128_type_node);
17760 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17763 /* TFmode support builtins. */
17764 ftype = build_function_type (float128_type_node,
17766 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17768 ftype = build_function_type_list (float128_type_node,
17769 float128_type_node,
17771 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17773 ftype = build_function_type_list (float128_type_node,
17774 float128_type_node,
17775 float128_type_node,
17777 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17780 /* Add all SSE builtins that are more or less simple operations on
17782 for (i = 0, d = bdesc_sse_3arg;
17783 i < ARRAY_SIZE (bdesc_sse_3arg);
17786 /* Use one of the operands; the target can have a different mode for
17787 mask-generating compares. */
17788 enum machine_mode mode;
17793 mode = insn_data[d->icode].operand[1].mode;
17798 type = v16qi_ftype_v16qi_v16qi_int;
17801 type = v8hi_ftype_v8hi_v8hi_int;
17804 type = v4si_ftype_v4si_v4si_int;
17807 type = v2di_ftype_v2di_v2di_int;
17810 type = v2df_ftype_v2df_v2df_int;
17813 type = v4sf_ftype_v4sf_v4sf_int;
17816 gcc_unreachable ();
17819 /* Override for variable blends. */
17822 case CODE_FOR_sse4_1_blendvpd:
17823 type = v2df_ftype_v2df_v2df_v2df;
17825 case CODE_FOR_sse4_1_blendvps:
17826 type = v4sf_ftype_v4sf_v4sf_v4sf;
17828 case CODE_FOR_sse4_1_pblendvb:
17829 type = v16qi_ftype_v16qi_v16qi_v16qi;
17835 def_builtin (d->mask, d->name, type, d->code);
17838 /* Add all builtins that are more or less simple operations on two
17840 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17842 /* Use one of the operands; the target can have a different mode for
17843 mask-generating compares. */
17844 enum machine_mode mode;
17849 mode = insn_data[d->icode].operand[1].mode;
17854 type = v16qi_ftype_v16qi_v16qi;
17857 type = v8hi_ftype_v8hi_v8hi;
17860 type = v4si_ftype_v4si_v4si;
17863 type = v2di_ftype_v2di_v2di;
17866 type = v2df_ftype_v2df_v2df;
17869 type = v4sf_ftype_v4sf_v4sf;
17872 type = v8qi_ftype_v8qi_v8qi;
17875 type = v4hi_ftype_v4hi_v4hi;
17878 type = v2si_ftype_v2si_v2si;
17881 type = di_ftype_di_di;
17885 gcc_unreachable ();
17888 /* Override for comparisons. */
17889 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17890 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17891 type = v4si_ftype_v4sf_v4sf;
17893 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17894 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17895 type = v2di_ftype_v2df_v2df;
17897 def_builtin (d->mask, d->name, type, d->code);
17900 /* Add all builtins that are more or less simple operations on 1 operand. */
17901 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17903 enum machine_mode mode;
17908 mode = insn_data[d->icode].operand[1].mode;
17913 type = v16qi_ftype_v16qi;
17916 type = v8hi_ftype_v8hi;
17919 type = v4si_ftype_v4si;
17922 type = v2df_ftype_v2df;
17925 type = v4sf_ftype_v4sf;
17928 type = v8qi_ftype_v8qi;
17931 type = v4hi_ftype_v4hi;
17934 type = v2si_ftype_v2si;
17941 def_builtin (d->mask, d->name, type, d->code);
17944 /* pcmpestr[im] insns. */
17945 for (i = 0, d = bdesc_pcmpestr;
17946 i < ARRAY_SIZE (bdesc_pcmpestr);
17949 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17950 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17952 ftype = int_ftype_v16qi_int_v16qi_int_int;
17953 def_builtin (d->mask, d->name, ftype, d->code);
17956 /* pcmpistr[im] insns. */
17957 for (i = 0, d = bdesc_pcmpistr;
17958 i < ARRAY_SIZE (bdesc_pcmpistr);
17961 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17962 ftype = v16qi_ftype_v16qi_v16qi_int;
17964 ftype = int_ftype_v16qi_v16qi_int;
17965 def_builtin (d->mask, d->name, ftype, d->code);
17968 /* Add the remaining MMX insns with somewhat more complicated types. */
17969 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17970 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17971 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17972 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17974 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17975 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17976 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17978 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17979 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17981 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17982 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17984 /* comi/ucomi insns. */
17985 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17986 if (d->mask == OPTION_MASK_ISA_SSE2)
17987 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17989 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17992 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17993 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17995 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17996 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17997 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17999 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18000 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18001 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18002 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18003 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18004 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18005 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18006 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18007 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18008 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18009 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18011 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18013 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18014 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18016 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18017 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18018 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18019 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18021 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18022 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18023 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18024 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18026 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18028 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18030 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18031 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18032 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18033 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18034 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18035 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18037 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18039 /* Original 3DNow! */
18040 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18041 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18042 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18043 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18044 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18045 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18046 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18047 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18048 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18049 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18050 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18051 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18052 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18053 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18054 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18055 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18056 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18057 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18058 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18059 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18061 /* 3DNow! extension as used in the Athlon CPU. */
18062 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18063 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18064 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18065 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18066 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18067 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18070 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18072 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18073 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18075 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18076 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18078 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18079 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18080 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18081 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18082 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18084 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18085 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18086 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18087 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18089 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18092 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18094 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18095 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18097 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18098 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18099 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18100 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18101 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18103 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18105 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18106 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18107 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18108 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18110 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18111 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18112 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18114 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18115 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18116 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18119 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18120 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18121 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18123 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18124 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18126 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18127 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18129 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18130 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18131 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18132 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18133 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18134 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18135 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18137 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18138 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18140 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18141 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18142 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18143 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18145 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18146 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18147 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18148 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18150 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18152 /* Prescott New Instructions. */
18153 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18154 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18155 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18158 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18159 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18162 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18163 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18164 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18165 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18166 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18167 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18168 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18169 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18170 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18171 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18172 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18173 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18174 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18175 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18176 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18177 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18178 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18179 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18182 ftype = build_function_type_list (unsigned_type_node,
18183 unsigned_type_node,
18184 unsigned_char_type_node,
18186 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18187 ftype = build_function_type_list (unsigned_type_node,
18188 unsigned_type_node,
18189 short_unsigned_type_node,
18191 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18192 ftype = build_function_type_list (unsigned_type_node,
18193 unsigned_type_node,
18194 unsigned_type_node,
18196 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18197 ftype = build_function_type_list (long_long_unsigned_type_node,
18198 long_long_unsigned_type_node,
18199 long_long_unsigned_type_node,
18201 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18203 /* AMDFAM10 SSE4A New built-ins */
18204 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18205 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18206 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18207 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18208 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18209 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18211 /* Access to the vec_init patterns. */
18212 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18213 integer_type_node, NULL_TREE);
18214 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18216 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18217 short_integer_type_node,
18218 short_integer_type_node,
18219 short_integer_type_node, NULL_TREE);
18220 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18222 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18223 char_type_node, char_type_node,
18224 char_type_node, char_type_node,
18225 char_type_node, char_type_node,
18226 char_type_node, NULL_TREE);
18227 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18229 /* Access to the vec_extract patterns. */
18230 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18231 integer_type_node, NULL_TREE);
18232 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18234 ftype = build_function_type_list (long_long_integer_type_node,
18235 V2DI_type_node, integer_type_node,
18237 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18239 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18240 integer_type_node, NULL_TREE);
18241 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18243 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18244 integer_type_node, NULL_TREE);
18245 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18247 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18248 integer_type_node, NULL_TREE);
18249 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18251 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18252 integer_type_node, NULL_TREE);
18253 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18255 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18256 integer_type_node, NULL_TREE);
18257 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18259 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18260 integer_type_node, NULL_TREE);
18261 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18263 /* Access to the vec_set patterns. */
18264 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18266 integer_type_node, NULL_TREE);
18267 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18269 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18271 integer_type_node, NULL_TREE);
18272 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18274 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18276 integer_type_node, NULL_TREE);
18277 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18279 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18281 integer_type_node, NULL_TREE);
18282 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18284 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18286 integer_type_node, NULL_TREE);
18287 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18289 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18291 integer_type_node, NULL_TREE);
18292 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18296 ix86_init_builtins (void)
18299 ix86_init_mmx_sse_builtins ();
18302 /* Errors in the source file can cause expand_expr to return const0_rtx
18303 where we expect a vector. To avoid crashing, use one of the vector
18304 clear instructions. */
18306 safe_vector_operand (rtx x, enum machine_mode mode)
18308 if (x == const0_rtx)
18309 x = CONST0_RTX (mode);
18313 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18314 4 operands. The third argument must be a constant smaller than 8
18318 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18322 tree arg0 = CALL_EXPR_ARG (exp, 0);
18323 tree arg1 = CALL_EXPR_ARG (exp, 1);
18324 tree arg2 = CALL_EXPR_ARG (exp, 2);
18325 rtx op0 = expand_normal (arg0);
18326 rtx op1 = expand_normal (arg1);
18327 rtx op2 = expand_normal (arg2);
18328 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18329 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18330 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18331 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18333 if (VECTOR_MODE_P (mode1))
18334 op0 = safe_vector_operand (op0, mode1);
18335 if (VECTOR_MODE_P (mode2))
18336 op1 = safe_vector_operand (op1, mode2);
18337 if (VECTOR_MODE_P (mode3))
18338 op2 = safe_vector_operand (op2, mode3);
18342 || GET_MODE (target) != tmode
18343 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18344 target = gen_reg_rtx (tmode);
18346 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18347 op0 = copy_to_mode_reg (mode1, op0);
18348 if ((optimize && !register_operand (op1, mode2))
18349 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18350 op1 = copy_to_mode_reg (mode2, op1);
18352 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18355 case CODE_FOR_sse4_1_blendvpd:
18356 case CODE_FOR_sse4_1_blendvps:
18357 case CODE_FOR_sse4_1_pblendvb:
18358 op2 = copy_to_mode_reg (mode3, op2);
18361 case CODE_FOR_sse4_1_roundsd:
18362 case CODE_FOR_sse4_1_roundss:
18363 error ("the third argument must be a 4-bit immediate");
18367 error ("the third argument must be an 8-bit immediate");
18371 pat = GEN_FCN (icode) (target, op0, op1, op2);
18378 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18381 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18384 tree arg0 = CALL_EXPR_ARG (exp, 0);
18385 tree arg1 = CALL_EXPR_ARG (exp, 1);
18386 rtx op0 = expand_normal (arg0);
18387 rtx op1 = expand_normal (arg1);
18388 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18389 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18390 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18394 || GET_MODE (target) != tmode
18395 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18396 target = gen_reg_rtx (tmode);
18398 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18399 op0 = copy_to_mode_reg (mode0, op0);
18400 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18402 op1 = copy_to_reg (op1);
18403 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18406 pat = GEN_FCN (icode) (target, op0, op1);
18413 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18416 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18419 tree arg0 = CALL_EXPR_ARG (exp, 0);
18420 tree arg1 = CALL_EXPR_ARG (exp, 1);
18421 rtx op0 = expand_normal (arg0);
18422 rtx op1 = expand_normal (arg1);
18423 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18424 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18425 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18427 if (VECTOR_MODE_P (mode0))
18428 op0 = safe_vector_operand (op0, mode0);
18429 if (VECTOR_MODE_P (mode1))
18430 op1 = safe_vector_operand (op1, mode1);
18432 if (optimize || !target
18433 || GET_MODE (target) != tmode
18434 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18435 target = gen_reg_rtx (tmode);
18437 if (GET_MODE (op1) == SImode && mode1 == TImode)
18439 rtx x = gen_reg_rtx (V4SImode);
18440 emit_insn (gen_sse2_loadd (x, op1));
18441 op1 = gen_lowpart (TImode, x);
18444 /* The insn must want input operands in the same modes as the
18446 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18447 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18449 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18450 op0 = copy_to_mode_reg (mode0, op0);
18451 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18452 op1 = copy_to_mode_reg (mode1, op1);
18454 /* ??? Using ix86_fixup_binary_operands is problematic when
18455 we've got mismatched modes. Fake it. */
18461 if (tmode == mode0 && tmode == mode1)
18463 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18467 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18469 op0 = force_reg (mode0, op0);
18470 op1 = force_reg (mode1, op1);
18471 target = gen_reg_rtx (tmode);
18474 pat = GEN_FCN (icode) (target, op0, op1);
18481 /* Subroutine of ix86_expand_builtin to take care of stores. */
18484 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18487 tree arg0 = CALL_EXPR_ARG (exp, 0);
18488 tree arg1 = CALL_EXPR_ARG (exp, 1);
18489 rtx op0 = expand_normal (arg0);
18490 rtx op1 = expand_normal (arg1);
18491 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18492 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18494 if (VECTOR_MODE_P (mode1))
18495 op1 = safe_vector_operand (op1, mode1);
18497 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18498 op1 = copy_to_mode_reg (mode1, op1);
18500 pat = GEN_FCN (icode) (op0, op1);
18506 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18509 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18510 rtx target, int do_load)
18513 tree arg0 = CALL_EXPR_ARG (exp, 0);
18514 rtx op0 = expand_normal (arg0);
18515 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18516 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18518 if (optimize || !target
18519 || GET_MODE (target) != tmode
18520 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18521 target = gen_reg_rtx (tmode);
18523 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18526 if (VECTOR_MODE_P (mode0))
18527 op0 = safe_vector_operand (op0, mode0);
18529 if ((optimize && !register_operand (op0, mode0))
18530 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18531 op0 = copy_to_mode_reg (mode0, op0);
18536 case CODE_FOR_sse4_1_roundpd:
18537 case CODE_FOR_sse4_1_roundps:
18539 tree arg1 = CALL_EXPR_ARG (exp, 1);
18540 rtx op1 = expand_normal (arg1);
18541 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18543 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18545 error ("the second argument must be a 4-bit immediate");
18548 pat = GEN_FCN (icode) (target, op0, op1);
18552 pat = GEN_FCN (icode) (target, op0);
18562 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18563 sqrtss, rsqrtss, rcpss. */
18566 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18569 tree arg0 = CALL_EXPR_ARG (exp, 0);
18570 rtx op1, op0 = expand_normal (arg0);
18571 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18572 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18574 if (optimize || !target
18575 || GET_MODE (target) != tmode
18576 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18577 target = gen_reg_rtx (tmode);
18579 if (VECTOR_MODE_P (mode0))
18580 op0 = safe_vector_operand (op0, mode0);
18582 if ((optimize && !register_operand (op0, mode0))
18583 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18584 op0 = copy_to_mode_reg (mode0, op0);
18587 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18588 op1 = copy_to_mode_reg (mode0, op1);
18590 pat = GEN_FCN (icode) (target, op0, op1);
18597 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18600 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18604 tree arg0 = CALL_EXPR_ARG (exp, 0);
18605 tree arg1 = CALL_EXPR_ARG (exp, 1);
18606 rtx op0 = expand_normal (arg0);
18607 rtx op1 = expand_normal (arg1);
18609 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18610 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18611 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18612 enum rtx_code comparison = d->comparison;
18614 if (VECTOR_MODE_P (mode0))
18615 op0 = safe_vector_operand (op0, mode0);
18616 if (VECTOR_MODE_P (mode1))
18617 op1 = safe_vector_operand (op1, mode1);
18619 /* Swap operands if we have a comparison that isn't available in
18621 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18623 rtx tmp = gen_reg_rtx (mode1);
18624 emit_move_insn (tmp, op1);
18629 if (optimize || !target
18630 || GET_MODE (target) != tmode
18631 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18632 target = gen_reg_rtx (tmode);
18634 if ((optimize && !register_operand (op0, mode0))
18635 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18636 op0 = copy_to_mode_reg (mode0, op0);
18637 if ((optimize && !register_operand (op1, mode1))
18638 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18639 op1 = copy_to_mode_reg (mode1, op1);
18641 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18642 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18649 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18652 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18656 tree arg0 = CALL_EXPR_ARG (exp, 0);
18657 tree arg1 = CALL_EXPR_ARG (exp, 1);
18658 rtx op0 = expand_normal (arg0);
18659 rtx op1 = expand_normal (arg1);
18660 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18661 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18662 enum rtx_code comparison = d->comparison;
18664 if (VECTOR_MODE_P (mode0))
18665 op0 = safe_vector_operand (op0, mode0);
18666 if (VECTOR_MODE_P (mode1))
18667 op1 = safe_vector_operand (op1, mode1);
18669 /* Swap operands if we have a comparison that isn't available in
18671 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18678 target = gen_reg_rtx (SImode);
18679 emit_move_insn (target, const0_rtx);
18680 target = gen_rtx_SUBREG (QImode, target, 0);
18682 if ((optimize && !register_operand (op0, mode0))
18683 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18684 op0 = copy_to_mode_reg (mode0, op0);
18685 if ((optimize && !register_operand (op1, mode1))
18686 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18687 op1 = copy_to_mode_reg (mode1, op1);
18689 pat = GEN_FCN (d->icode) (op0, op1);
18693 emit_insn (gen_rtx_SET (VOIDmode,
18694 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18695 gen_rtx_fmt_ee (comparison, QImode,
18699 return SUBREG_REG (target);
18702 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18705 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18709 tree arg0 = CALL_EXPR_ARG (exp, 0);
18710 tree arg1 = CALL_EXPR_ARG (exp, 1);
18711 rtx op0 = expand_normal (arg0);
18712 rtx op1 = expand_normal (arg1);
18713 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18714 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18715 enum rtx_code comparison = d->comparison;
18717 if (VECTOR_MODE_P (mode0))
18718 op0 = safe_vector_operand (op0, mode0);
18719 if (VECTOR_MODE_P (mode1))
18720 op1 = safe_vector_operand (op1, mode1);
18722 target = gen_reg_rtx (SImode);
18723 emit_move_insn (target, const0_rtx);
18724 target = gen_rtx_SUBREG (QImode, target, 0);
18726 if ((optimize && !register_operand (op0, mode0))
18727 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18728 op0 = copy_to_mode_reg (mode0, op0);
18729 if ((optimize && !register_operand (op1, mode1))
18730 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18731 op1 = copy_to_mode_reg (mode1, op1);
18733 pat = GEN_FCN (d->icode) (op0, op1);
18737 emit_insn (gen_rtx_SET (VOIDmode,
18738 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18739 gen_rtx_fmt_ee (comparison, QImode,
18743 return SUBREG_REG (target);
18746 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18749 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18750 tree exp, rtx target)
18753 tree arg0 = CALL_EXPR_ARG (exp, 0);
18754 tree arg1 = CALL_EXPR_ARG (exp, 1);
18755 tree arg2 = CALL_EXPR_ARG (exp, 2);
18756 tree arg3 = CALL_EXPR_ARG (exp, 3);
18757 tree arg4 = CALL_EXPR_ARG (exp, 4);
18758 rtx scratch0, scratch1;
18759 rtx op0 = expand_normal (arg0);
18760 rtx op1 = expand_normal (arg1);
18761 rtx op2 = expand_normal (arg2);
18762 rtx op3 = expand_normal (arg3);
18763 rtx op4 = expand_normal (arg4);
18764 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18766 tmode0 = insn_data[d->icode].operand[0].mode;
18767 tmode1 = insn_data[d->icode].operand[1].mode;
18768 modev2 = insn_data[d->icode].operand[2].mode;
18769 modei3 = insn_data[d->icode].operand[3].mode;
18770 modev4 = insn_data[d->icode].operand[4].mode;
18771 modei5 = insn_data[d->icode].operand[5].mode;
18772 modeimm = insn_data[d->icode].operand[6].mode;
18774 if (VECTOR_MODE_P (modev2))
18775 op0 = safe_vector_operand (op0, modev2);
18776 if (VECTOR_MODE_P (modev4))
18777 op2 = safe_vector_operand (op2, modev4);
18779 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18780 op0 = copy_to_mode_reg (modev2, op0);
18781 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18782 op1 = copy_to_mode_reg (modei3, op1);
18783 if ((optimize && !register_operand (op2, modev4))
18784 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18785 op2 = copy_to_mode_reg (modev4, op2);
18786 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18787 op3 = copy_to_mode_reg (modei5, op3);
18789 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18791 error ("the fifth argument must be a 8-bit immediate");
18795 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18797 if (optimize || !target
18798 || GET_MODE (target) != tmode0
18799 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18800 target = gen_reg_rtx (tmode0);
18802 scratch1 = gen_reg_rtx (tmode1);
18804 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18806 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18808 if (optimize || !target
18809 || GET_MODE (target) != tmode1
18810 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18811 target = gen_reg_rtx (tmode1);
18813 scratch0 = gen_reg_rtx (tmode0);
18815 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18819 gcc_assert (d->flag);
18821 scratch0 = gen_reg_rtx (tmode0);
18822 scratch1 = gen_reg_rtx (tmode1);
18824 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18834 target = gen_reg_rtx (SImode);
18835 emit_move_insn (target, const0_rtx);
18836 target = gen_rtx_SUBREG (QImode, target, 0);
18839 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18840 gen_rtx_fmt_ee (EQ, QImode,
18841 gen_rtx_REG ((enum machine_mode) d->flag,
18844 return SUBREG_REG (target);
18851 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18854 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18855 tree exp, rtx target)
18858 tree arg0 = CALL_EXPR_ARG (exp, 0);
18859 tree arg1 = CALL_EXPR_ARG (exp, 1);
18860 tree arg2 = CALL_EXPR_ARG (exp, 2);
18861 rtx scratch0, scratch1;
18862 rtx op0 = expand_normal (arg0);
18863 rtx op1 = expand_normal (arg1);
18864 rtx op2 = expand_normal (arg2);
18865 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18867 tmode0 = insn_data[d->icode].operand[0].mode;
18868 tmode1 = insn_data[d->icode].operand[1].mode;
18869 modev2 = insn_data[d->icode].operand[2].mode;
18870 modev3 = insn_data[d->icode].operand[3].mode;
18871 modeimm = insn_data[d->icode].operand[4].mode;
18873 if (VECTOR_MODE_P (modev2))
18874 op0 = safe_vector_operand (op0, modev2);
18875 if (VECTOR_MODE_P (modev3))
18876 op1 = safe_vector_operand (op1, modev3);
18878 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18879 op0 = copy_to_mode_reg (modev2, op0);
18880 if ((optimize && !register_operand (op1, modev3))
18881 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18882 op1 = copy_to_mode_reg (modev3, op1);
18884 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18886 error ("the third argument must be a 8-bit immediate");
18890 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18892 if (optimize || !target
18893 || GET_MODE (target) != tmode0
18894 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18895 target = gen_reg_rtx (tmode0);
18897 scratch1 = gen_reg_rtx (tmode1);
18899 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18901 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18903 if (optimize || !target
18904 || GET_MODE (target) != tmode1
18905 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18906 target = gen_reg_rtx (tmode1);
18908 scratch0 = gen_reg_rtx (tmode0);
18910 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18914 gcc_assert (d->flag);
18916 scratch0 = gen_reg_rtx (tmode0);
18917 scratch1 = gen_reg_rtx (tmode1);
18919 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18929 target = gen_reg_rtx (SImode);
18930 emit_move_insn (target, const0_rtx);
18931 target = gen_rtx_SUBREG (QImode, target, 0);
18934 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18935 gen_rtx_fmt_ee (EQ, QImode,
18936 gen_rtx_REG ((enum machine_mode) d->flag,
18939 return SUBREG_REG (target);
18945 /* Return the integer constant in ARG. Constrain it to be in the range
18946 of the subparts of VEC_TYPE; issue an error if not. */
18949 get_element_number (tree vec_type, tree arg)
18951 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18953 if (!host_integerp (arg, 1)
18954 || (elt = tree_low_cst (arg, 1), elt > max))
18956 error ("selector must be an integer constant in the range 0..%wi", max);
18963 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18964 ix86_expand_vector_init. We DO have language-level syntax for this, in
18965 the form of (type){ init-list }. Except that since we can't place emms
18966 instructions from inside the compiler, we can't allow the use of MMX
18967 registers unless the user explicitly asks for it. So we do *not* define
18968 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18969 we have builtins invoked by mmintrin.h that gives us license to emit
18970 these sorts of instructions. */
18973 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18975 enum machine_mode tmode = TYPE_MODE (type);
18976 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18977 int i, n_elt = GET_MODE_NUNITS (tmode);
18978 rtvec v = rtvec_alloc (n_elt);
18980 gcc_assert (VECTOR_MODE_P (tmode));
18981 gcc_assert (call_expr_nargs (exp) == n_elt);
18983 for (i = 0; i < n_elt; ++i)
18985 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18986 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18989 if (!target || !register_operand (target, tmode))
18990 target = gen_reg_rtx (tmode);
18992 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18996 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18997 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18998 had a language-level syntax for referencing vector elements. */
19001 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19003 enum machine_mode tmode, mode0;
19008 arg0 = CALL_EXPR_ARG (exp, 0);
19009 arg1 = CALL_EXPR_ARG (exp, 1);
19011 op0 = expand_normal (arg0);
19012 elt = get_element_number (TREE_TYPE (arg0), arg1);
19014 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19015 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19016 gcc_assert (VECTOR_MODE_P (mode0));
19018 op0 = force_reg (mode0, op0);
19020 if (optimize || !target || !register_operand (target, tmode))
19021 target = gen_reg_rtx (tmode);
19023 ix86_expand_vector_extract (true, target, op0, elt);
19028 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19029 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19030 a language-level syntax for referencing vector elements. */
19033 ix86_expand_vec_set_builtin (tree exp)
19035 enum machine_mode tmode, mode1;
19036 tree arg0, arg1, arg2;
19038 rtx op0, op1, target;
19040 arg0 = CALL_EXPR_ARG (exp, 0);
19041 arg1 = CALL_EXPR_ARG (exp, 1);
19042 arg2 = CALL_EXPR_ARG (exp, 2);
19044 tmode = TYPE_MODE (TREE_TYPE (arg0));
19045 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19046 gcc_assert (VECTOR_MODE_P (tmode));
19048 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19049 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19050 elt = get_element_number (TREE_TYPE (arg0), arg2);
19052 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19053 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19055 op0 = force_reg (tmode, op0);
19056 op1 = force_reg (mode1, op1);
19058 /* OP0 is the source of these builtin functions and shouldn't be
19059 modified. Create a copy, use it and return it as target. */
19060 target = gen_reg_rtx (tmode);
19061 emit_move_insn (target, op0);
19062 ix86_expand_vector_set (true, target, op1, elt);
19067 /* Expand an expression EXP that calls a built-in function,
19068 with result going to TARGET if that's convenient
19069 (and in mode MODE if that's convenient).
19070 SUBTARGET may be used as the target for computing one of EXP's operands.
19071 IGNORE is nonzero if the value is to be ignored. */
19074 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19075 enum machine_mode mode ATTRIBUTE_UNUSED,
19076 int ignore ATTRIBUTE_UNUSED)
19078 const struct builtin_description *d;
19080 enum insn_code icode;
19081 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19082 tree arg0, arg1, arg2, arg3;
19083 rtx op0, op1, op2, op3, pat;
19084 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19085 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19089 case IX86_BUILTIN_EMMS:
19090 emit_insn (gen_mmx_emms ());
19093 case IX86_BUILTIN_SFENCE:
19094 emit_insn (gen_sse_sfence ());
19097 case IX86_BUILTIN_MASKMOVQ:
19098 case IX86_BUILTIN_MASKMOVDQU:
19099 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19100 ? CODE_FOR_mmx_maskmovq
19101 : CODE_FOR_sse2_maskmovdqu);
19102 /* Note the arg order is different from the operand order. */
19103 arg1 = CALL_EXPR_ARG (exp, 0);
19104 arg2 = CALL_EXPR_ARG (exp, 1);
19105 arg0 = CALL_EXPR_ARG (exp, 2);
19106 op0 = expand_normal (arg0);
19107 op1 = expand_normal (arg1);
19108 op2 = expand_normal (arg2);
19109 mode0 = insn_data[icode].operand[0].mode;
19110 mode1 = insn_data[icode].operand[1].mode;
19111 mode2 = insn_data[icode].operand[2].mode;
19113 op0 = force_reg (Pmode, op0);
19114 op0 = gen_rtx_MEM (mode1, op0);
19116 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19117 op0 = copy_to_mode_reg (mode0, op0);
19118 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19119 op1 = copy_to_mode_reg (mode1, op1);
19120 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19121 op2 = copy_to_mode_reg (mode2, op2);
19122 pat = GEN_FCN (icode) (op0, op1, op2);
19128 case IX86_BUILTIN_SQRTSS:
19129 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19130 case IX86_BUILTIN_RSQRTSS:
19131 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19132 case IX86_BUILTIN_RCPSS:
19133 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19135 case IX86_BUILTIN_LOADUPS:
19136 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19138 case IX86_BUILTIN_STOREUPS:
19139 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19141 case IX86_BUILTIN_LOADHPS:
19142 case IX86_BUILTIN_LOADLPS:
19143 case IX86_BUILTIN_LOADHPD:
19144 case IX86_BUILTIN_LOADLPD:
19145 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19146 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19147 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19148 : CODE_FOR_sse2_loadlpd);
19149 arg0 = CALL_EXPR_ARG (exp, 0);
19150 arg1 = CALL_EXPR_ARG (exp, 1);
19151 op0 = expand_normal (arg0);
19152 op1 = expand_normal (arg1);
19153 tmode = insn_data[icode].operand[0].mode;
19154 mode0 = insn_data[icode].operand[1].mode;
19155 mode1 = insn_data[icode].operand[2].mode;
19157 op0 = force_reg (mode0, op0);
19158 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19159 if (optimize || target == 0
19160 || GET_MODE (target) != tmode
19161 || !register_operand (target, tmode))
19162 target = gen_reg_rtx (tmode);
19163 pat = GEN_FCN (icode) (target, op0, op1);
19169 case IX86_BUILTIN_STOREHPS:
19170 case IX86_BUILTIN_STORELPS:
19171 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19172 : CODE_FOR_sse_storelps);
19173 arg0 = CALL_EXPR_ARG (exp, 0);
19174 arg1 = CALL_EXPR_ARG (exp, 1);
19175 op0 = expand_normal (arg0);
19176 op1 = expand_normal (arg1);
19177 mode0 = insn_data[icode].operand[0].mode;
19178 mode1 = insn_data[icode].operand[1].mode;
19180 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19181 op1 = force_reg (mode1, op1);
19183 pat = GEN_FCN (icode) (op0, op1);
19189 case IX86_BUILTIN_MOVNTPS:
19190 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19191 case IX86_BUILTIN_MOVNTQ:
19192 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19194 case IX86_BUILTIN_LDMXCSR:
19195 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19196 target = assign_386_stack_local (SImode, SLOT_TEMP);
19197 emit_move_insn (target, op0);
19198 emit_insn (gen_sse_ldmxcsr (target));
19201 case IX86_BUILTIN_STMXCSR:
19202 target = assign_386_stack_local (SImode, SLOT_TEMP);
19203 emit_insn (gen_sse_stmxcsr (target));
19204 return copy_to_mode_reg (SImode, target);
19206 case IX86_BUILTIN_SHUFPS:
19207 case IX86_BUILTIN_SHUFPD:
19208 icode = (fcode == IX86_BUILTIN_SHUFPS
19209 ? CODE_FOR_sse_shufps
19210 : CODE_FOR_sse2_shufpd);
19211 arg0 = CALL_EXPR_ARG (exp, 0);
19212 arg1 = CALL_EXPR_ARG (exp, 1);
19213 arg2 = CALL_EXPR_ARG (exp, 2);
19214 op0 = expand_normal (arg0);
19215 op1 = expand_normal (arg1);
19216 op2 = expand_normal (arg2);
19217 tmode = insn_data[icode].operand[0].mode;
19218 mode0 = insn_data[icode].operand[1].mode;
19219 mode1 = insn_data[icode].operand[2].mode;
19220 mode2 = insn_data[icode].operand[3].mode;
19222 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19223 op0 = copy_to_mode_reg (mode0, op0);
19224 if ((optimize && !register_operand (op1, mode1))
19225 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19226 op1 = copy_to_mode_reg (mode1, op1);
19227 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19229 /* @@@ better error message */
19230 error ("mask must be an immediate");
19231 return gen_reg_rtx (tmode);
19233 if (optimize || target == 0
19234 || GET_MODE (target) != tmode
19235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19236 target = gen_reg_rtx (tmode);
19237 pat = GEN_FCN (icode) (target, op0, op1, op2);
19243 case IX86_BUILTIN_PSHUFW:
19244 case IX86_BUILTIN_PSHUFD:
19245 case IX86_BUILTIN_PSHUFHW:
19246 case IX86_BUILTIN_PSHUFLW:
19247 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19248 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19249 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19250 : CODE_FOR_mmx_pshufw);
19251 arg0 = CALL_EXPR_ARG (exp, 0);
19252 arg1 = CALL_EXPR_ARG (exp, 1);
19253 op0 = expand_normal (arg0);
19254 op1 = expand_normal (arg1);
19255 tmode = insn_data[icode].operand[0].mode;
19256 mode1 = insn_data[icode].operand[1].mode;
19257 mode2 = insn_data[icode].operand[2].mode;
19259 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19260 op0 = copy_to_mode_reg (mode1, op0);
19261 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19263 /* @@@ better error message */
19264 error ("mask must be an immediate");
19268 || GET_MODE (target) != tmode
19269 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19270 target = gen_reg_rtx (tmode);
19271 pat = GEN_FCN (icode) (target, op0, op1);
19277 case IX86_BUILTIN_PSLLWI128:
19278 icode = CODE_FOR_ashlv8hi3;
19280 case IX86_BUILTIN_PSLLDI128:
19281 icode = CODE_FOR_ashlv4si3;
19283 case IX86_BUILTIN_PSLLQI128:
19284 icode = CODE_FOR_ashlv2di3;
19286 case IX86_BUILTIN_PSRAWI128:
19287 icode = CODE_FOR_ashrv8hi3;
19289 case IX86_BUILTIN_PSRADI128:
19290 icode = CODE_FOR_ashrv4si3;
19292 case IX86_BUILTIN_PSRLWI128:
19293 icode = CODE_FOR_lshrv8hi3;
19295 case IX86_BUILTIN_PSRLDI128:
19296 icode = CODE_FOR_lshrv4si3;
19298 case IX86_BUILTIN_PSRLQI128:
19299 icode = CODE_FOR_lshrv2di3;
19302 arg0 = CALL_EXPR_ARG (exp, 0);
19303 arg1 = CALL_EXPR_ARG (exp, 1);
19304 op0 = expand_normal (arg0);
19305 op1 = expand_normal (arg1);
19307 if (!CONST_INT_P (op1))
19309 error ("shift must be an immediate");
19312 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19313 op1 = GEN_INT (255);
19315 tmode = insn_data[icode].operand[0].mode;
19316 mode1 = insn_data[icode].operand[1].mode;
19317 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19318 op0 = copy_to_reg (op0);
19320 target = gen_reg_rtx (tmode);
19321 pat = GEN_FCN (icode) (target, op0, op1);
19327 case IX86_BUILTIN_PSLLW128:
19328 icode = CODE_FOR_ashlv8hi3;
19330 case IX86_BUILTIN_PSLLD128:
19331 icode = CODE_FOR_ashlv4si3;
19333 case IX86_BUILTIN_PSLLQ128:
19334 icode = CODE_FOR_ashlv2di3;
19336 case IX86_BUILTIN_PSRAW128:
19337 icode = CODE_FOR_ashrv8hi3;
19339 case IX86_BUILTIN_PSRAD128:
19340 icode = CODE_FOR_ashrv4si3;
19342 case IX86_BUILTIN_PSRLW128:
19343 icode = CODE_FOR_lshrv8hi3;
19345 case IX86_BUILTIN_PSRLD128:
19346 icode = CODE_FOR_lshrv4si3;
19348 case IX86_BUILTIN_PSRLQ128:
19349 icode = CODE_FOR_lshrv2di3;
19352 arg0 = CALL_EXPR_ARG (exp, 0);
19353 arg1 = CALL_EXPR_ARG (exp, 1);
19354 op0 = expand_normal (arg0);
19355 op1 = expand_normal (arg1);
19357 tmode = insn_data[icode].operand[0].mode;
19358 mode1 = insn_data[icode].operand[1].mode;
19360 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19361 op0 = copy_to_reg (op0);
19363 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19364 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19365 op1 = copy_to_reg (op1);
19367 target = gen_reg_rtx (tmode);
19368 pat = GEN_FCN (icode) (target, op0, op1);
19374 case IX86_BUILTIN_PSLLDQI128:
19375 case IX86_BUILTIN_PSRLDQI128:
19376 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19377 : CODE_FOR_sse2_lshrti3);
19378 arg0 = CALL_EXPR_ARG (exp, 0);
19379 arg1 = CALL_EXPR_ARG (exp, 1);
19380 op0 = expand_normal (arg0);
19381 op1 = expand_normal (arg1);
19382 tmode = insn_data[icode].operand[0].mode;
19383 mode1 = insn_data[icode].operand[1].mode;
19384 mode2 = insn_data[icode].operand[2].mode;
19386 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19388 op0 = copy_to_reg (op0);
19389 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19391 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19393 error ("shift must be an immediate");
19396 target = gen_reg_rtx (V2DImode);
19397 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19404 case IX86_BUILTIN_FEMMS:
19405 emit_insn (gen_mmx_femms ());
19408 case IX86_BUILTIN_PAVGUSB:
19409 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19411 case IX86_BUILTIN_PF2ID:
19412 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19414 case IX86_BUILTIN_PFACC:
19415 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19417 case IX86_BUILTIN_PFADD:
19418 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19420 case IX86_BUILTIN_PFCMPEQ:
19421 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19423 case IX86_BUILTIN_PFCMPGE:
19424 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19426 case IX86_BUILTIN_PFCMPGT:
19427 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19429 case IX86_BUILTIN_PFMAX:
19430 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19432 case IX86_BUILTIN_PFMIN:
19433 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19435 case IX86_BUILTIN_PFMUL:
19436 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19438 case IX86_BUILTIN_PFRCP:
19439 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19441 case IX86_BUILTIN_PFRCPIT1:
19442 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19444 case IX86_BUILTIN_PFRCPIT2:
19445 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19447 case IX86_BUILTIN_PFRSQIT1:
19448 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19450 case IX86_BUILTIN_PFRSQRT:
19451 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19453 case IX86_BUILTIN_PFSUB:
19454 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19456 case IX86_BUILTIN_PFSUBR:
19457 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19459 case IX86_BUILTIN_PI2FD:
19460 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19462 case IX86_BUILTIN_PMULHRW:
19463 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19465 case IX86_BUILTIN_PF2IW:
19466 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19468 case IX86_BUILTIN_PFNACC:
19469 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19471 case IX86_BUILTIN_PFPNACC:
19472 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19474 case IX86_BUILTIN_PI2FW:
19475 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19477 case IX86_BUILTIN_PSWAPDSI:
19478 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19480 case IX86_BUILTIN_PSWAPDSF:
19481 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19483 case IX86_BUILTIN_SQRTSD:
19484 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19485 case IX86_BUILTIN_LOADUPD:
19486 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19487 case IX86_BUILTIN_STOREUPD:
19488 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19490 case IX86_BUILTIN_MFENCE:
19491 emit_insn (gen_sse2_mfence ());
19493 case IX86_BUILTIN_LFENCE:
19494 emit_insn (gen_sse2_lfence ());
19497 case IX86_BUILTIN_CLFLUSH:
19498 arg0 = CALL_EXPR_ARG (exp, 0);
19499 op0 = expand_normal (arg0);
19500 icode = CODE_FOR_sse2_clflush;
19501 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19502 op0 = copy_to_mode_reg (Pmode, op0);
19504 emit_insn (gen_sse2_clflush (op0));
19507 case IX86_BUILTIN_MOVNTPD:
19508 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19509 case IX86_BUILTIN_MOVNTDQ:
19510 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19511 case IX86_BUILTIN_MOVNTI:
19512 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19514 case IX86_BUILTIN_LOADDQU:
19515 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19516 case IX86_BUILTIN_STOREDQU:
19517 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19519 case IX86_BUILTIN_MONITOR:
19520 arg0 = CALL_EXPR_ARG (exp, 0);
19521 arg1 = CALL_EXPR_ARG (exp, 1);
19522 arg2 = CALL_EXPR_ARG (exp, 2);
19523 op0 = expand_normal (arg0);
19524 op1 = expand_normal (arg1);
19525 op2 = expand_normal (arg2);
19527 op0 = copy_to_mode_reg (Pmode, op0);
19529 op1 = copy_to_mode_reg (SImode, op1);
19531 op2 = copy_to_mode_reg (SImode, op2);
19533 emit_insn (gen_sse3_monitor (op0, op1, op2));
19535 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19538 case IX86_BUILTIN_MWAIT:
19539 arg0 = CALL_EXPR_ARG (exp, 0);
19540 arg1 = CALL_EXPR_ARG (exp, 1);
19541 op0 = expand_normal (arg0);
19542 op1 = expand_normal (arg1);
19544 op0 = copy_to_mode_reg (SImode, op0);
19546 op1 = copy_to_mode_reg (SImode, op1);
19547 emit_insn (gen_sse3_mwait (op0, op1));
19550 case IX86_BUILTIN_LDDQU:
19551 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19554 case IX86_BUILTIN_PALIGNR:
19555 case IX86_BUILTIN_PALIGNR128:
19556 if (fcode == IX86_BUILTIN_PALIGNR)
19558 icode = CODE_FOR_ssse3_palignrdi;
19563 icode = CODE_FOR_ssse3_palignrti;
19566 arg0 = CALL_EXPR_ARG (exp, 0);
19567 arg1 = CALL_EXPR_ARG (exp, 1);
19568 arg2 = CALL_EXPR_ARG (exp, 2);
19569 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19570 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19571 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19572 tmode = insn_data[icode].operand[0].mode;
19573 mode1 = insn_data[icode].operand[1].mode;
19574 mode2 = insn_data[icode].operand[2].mode;
19575 mode3 = insn_data[icode].operand[3].mode;
19577 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19579 op0 = copy_to_reg (op0);
19580 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19582 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19584 op1 = copy_to_reg (op1);
19585 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19587 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19589 error ("shift must be an immediate");
19592 target = gen_reg_rtx (mode);
19593 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19600 case IX86_BUILTIN_MOVNTDQA:
19601 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19604 case IX86_BUILTIN_MOVNTSD:
19605 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19607 case IX86_BUILTIN_MOVNTSS:
19608 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19610 case IX86_BUILTIN_INSERTQ:
19611 case IX86_BUILTIN_EXTRQ:
19612 icode = (fcode == IX86_BUILTIN_EXTRQ
19613 ? CODE_FOR_sse4a_extrq
19614 : CODE_FOR_sse4a_insertq);
19615 arg0 = CALL_EXPR_ARG (exp, 0);
19616 arg1 = CALL_EXPR_ARG (exp, 1);
19617 op0 = expand_normal (arg0);
19618 op1 = expand_normal (arg1);
19619 tmode = insn_data[icode].operand[0].mode;
19620 mode1 = insn_data[icode].operand[1].mode;
19621 mode2 = insn_data[icode].operand[2].mode;
19622 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19623 op0 = copy_to_mode_reg (mode1, op0);
19624 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19625 op1 = copy_to_mode_reg (mode2, op1);
19626 if (optimize || target == 0
19627 || GET_MODE (target) != tmode
19628 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19629 target = gen_reg_rtx (tmode);
19630 pat = GEN_FCN (icode) (target, op0, op1);
19636 case IX86_BUILTIN_EXTRQI:
19637 icode = CODE_FOR_sse4a_extrqi;
19638 arg0 = CALL_EXPR_ARG (exp, 0);
19639 arg1 = CALL_EXPR_ARG (exp, 1);
19640 arg2 = CALL_EXPR_ARG (exp, 2);
19641 op0 = expand_normal (arg0);
19642 op1 = expand_normal (arg1);
19643 op2 = expand_normal (arg2);
19644 tmode = insn_data[icode].operand[0].mode;
19645 mode1 = insn_data[icode].operand[1].mode;
19646 mode2 = insn_data[icode].operand[2].mode;
19647 mode3 = insn_data[icode].operand[3].mode;
19648 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19649 op0 = copy_to_mode_reg (mode1, op0);
19650 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19652 error ("index mask must be an immediate");
19653 return gen_reg_rtx (tmode);
19655 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19657 error ("length mask must be an immediate");
19658 return gen_reg_rtx (tmode);
19660 if (optimize || target == 0
19661 || GET_MODE (target) != tmode
19662 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19663 target = gen_reg_rtx (tmode);
19664 pat = GEN_FCN (icode) (target, op0, op1, op2);
19670 case IX86_BUILTIN_INSERTQI:
19671 icode = CODE_FOR_sse4a_insertqi;
19672 arg0 = CALL_EXPR_ARG (exp, 0);
19673 arg1 = CALL_EXPR_ARG (exp, 1);
19674 arg2 = CALL_EXPR_ARG (exp, 2);
19675 arg3 = CALL_EXPR_ARG (exp, 3);
19676 op0 = expand_normal (arg0);
19677 op1 = expand_normal (arg1);
19678 op2 = expand_normal (arg2);
19679 op3 = expand_normal (arg3);
19680 tmode = insn_data[icode].operand[0].mode;
19681 mode1 = insn_data[icode].operand[1].mode;
19682 mode2 = insn_data[icode].operand[2].mode;
19683 mode3 = insn_data[icode].operand[3].mode;
19684 mode4 = insn_data[icode].operand[4].mode;
19686 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19687 op0 = copy_to_mode_reg (mode1, op0);
19689 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19690 op1 = copy_to_mode_reg (mode2, op1);
19692 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19694 error ("index mask must be an immediate");
19695 return gen_reg_rtx (tmode);
19697 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19699 error ("length mask must be an immediate");
19700 return gen_reg_rtx (tmode);
19702 if (optimize || target == 0
19703 || GET_MODE (target) != tmode
19704 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19705 target = gen_reg_rtx (tmode);
19706 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19712 case IX86_BUILTIN_VEC_INIT_V2SI:
19713 case IX86_BUILTIN_VEC_INIT_V4HI:
19714 case IX86_BUILTIN_VEC_INIT_V8QI:
19715 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19717 case IX86_BUILTIN_VEC_EXT_V2DF:
19718 case IX86_BUILTIN_VEC_EXT_V2DI:
19719 case IX86_BUILTIN_VEC_EXT_V4SF:
19720 case IX86_BUILTIN_VEC_EXT_V4SI:
19721 case IX86_BUILTIN_VEC_EXT_V8HI:
19722 case IX86_BUILTIN_VEC_EXT_V2SI:
19723 case IX86_BUILTIN_VEC_EXT_V4HI:
19724 case IX86_BUILTIN_VEC_EXT_V16QI:
19725 return ix86_expand_vec_ext_builtin (exp, target);
19727 case IX86_BUILTIN_VEC_SET_V2DI:
19728 case IX86_BUILTIN_VEC_SET_V4SF:
19729 case IX86_BUILTIN_VEC_SET_V4SI:
19730 case IX86_BUILTIN_VEC_SET_V8HI:
19731 case IX86_BUILTIN_VEC_SET_V4HI:
19732 case IX86_BUILTIN_VEC_SET_V16QI:
19733 return ix86_expand_vec_set_builtin (exp);
19735 case IX86_BUILTIN_INFQ:
19737 REAL_VALUE_TYPE inf;
19741 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19743 tmp = validize_mem (force_const_mem (mode, tmp));
19746 target = gen_reg_rtx (mode);
19748 emit_move_insn (target, tmp);
19752 case IX86_BUILTIN_FABSQ:
19753 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19755 case IX86_BUILTIN_COPYSIGNQ:
19756 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19762 for (i = 0, d = bdesc_sse_3arg;
19763 i < ARRAY_SIZE (bdesc_sse_3arg);
19765 if (d->code == fcode)
19766 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19769 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19770 if (d->code == fcode)
19772 /* Compares are treated specially. */
19773 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19774 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19775 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19776 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19777 return ix86_expand_sse_compare (d, exp, target);
19779 return ix86_expand_binop_builtin (d->icode, exp, target);
19782 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19783 if (d->code == fcode)
19784 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19786 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19787 if (d->code == fcode)
19788 return ix86_expand_sse_comi (d, exp, target);
19790 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19791 if (d->code == fcode)
19792 return ix86_expand_sse_ptest (d, exp, target);
19794 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19795 if (d->code == fcode)
19796 return ix86_expand_crc32 (d->icode, exp, target);
19798 for (i = 0, d = bdesc_pcmpestr;
19799 i < ARRAY_SIZE (bdesc_pcmpestr);
19801 if (d->code == fcode)
19802 return ix86_expand_sse_pcmpestr (d, exp, target);
19804 for (i = 0, d = bdesc_pcmpistr;
19805 i < ARRAY_SIZE (bdesc_pcmpistr);
19807 if (d->code == fcode)
19808 return ix86_expand_sse_pcmpistr (d, exp, target);
19810 gcc_unreachable ();
19813 /* Returns a function decl for a vectorized version of the builtin function
19814 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19815 if it is not available. */
19818 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19821 enum machine_mode in_mode, out_mode;
19824 if (TREE_CODE (type_out) != VECTOR_TYPE
19825 || TREE_CODE (type_in) != VECTOR_TYPE)
19828 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19829 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19830 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19831 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19835 case BUILT_IN_SQRT:
19836 if (out_mode == DFmode && out_n == 2
19837 && in_mode == DFmode && in_n == 2)
19838 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19841 case BUILT_IN_SQRTF:
19842 if (out_mode == SFmode && out_n == 4
19843 && in_mode == SFmode && in_n == 4)
19844 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19847 case BUILT_IN_LRINTF:
19848 if (out_mode == SImode && out_n == 4
19849 && in_mode == SFmode && in_n == 4)
19850 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19860 /* Returns a decl of a function that implements conversion of the
19861 input vector of type TYPE, or NULL_TREE if it is not available. */
19864 ix86_builtin_conversion (unsigned int code, tree type)
19866 if (TREE_CODE (type) != VECTOR_TYPE)
19872 switch (TYPE_MODE (type))
19875 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19880 case FIX_TRUNC_EXPR:
19881 switch (TYPE_MODE (type))
19884 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19894 /* Store OPERAND to the memory after reload is completed. This means
19895 that we can't easily use assign_stack_local. */
19897 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19901 gcc_assert (reload_completed);
19902 if (TARGET_RED_ZONE)
19904 result = gen_rtx_MEM (mode,
19905 gen_rtx_PLUS (Pmode,
19907 GEN_INT (-RED_ZONE_SIZE)));
19908 emit_move_insn (result, operand);
19910 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19916 operand = gen_lowpart (DImode, operand);
19920 gen_rtx_SET (VOIDmode,
19921 gen_rtx_MEM (DImode,
19922 gen_rtx_PRE_DEC (DImode,
19923 stack_pointer_rtx)),
19927 gcc_unreachable ();
19929 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19938 split_di (&operand, 1, operands, operands + 1);
19940 gen_rtx_SET (VOIDmode,
19941 gen_rtx_MEM (SImode,
19942 gen_rtx_PRE_DEC (Pmode,
19943 stack_pointer_rtx)),
19946 gen_rtx_SET (VOIDmode,
19947 gen_rtx_MEM (SImode,
19948 gen_rtx_PRE_DEC (Pmode,
19949 stack_pointer_rtx)),
19954 /* Store HImodes as SImodes. */
19955 operand = gen_lowpart (SImode, operand);
19959 gen_rtx_SET (VOIDmode,
19960 gen_rtx_MEM (GET_MODE (operand),
19961 gen_rtx_PRE_DEC (SImode,
19962 stack_pointer_rtx)),
19966 gcc_unreachable ();
19968 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19973 /* Free operand from the memory. */
19975 ix86_free_from_memory (enum machine_mode mode)
19977 if (!TARGET_RED_ZONE)
19981 if (mode == DImode || TARGET_64BIT)
19985 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19986 to pop or add instruction if registers are available. */
19987 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19988 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19993 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19994 QImode must go into class Q_REGS.
19995 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19996 movdf to do mem-to-mem moves through integer regs. */
19998 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20000 enum machine_mode mode = GET_MODE (x);
20002 /* We're only allowed to return a subclass of CLASS. Many of the
20003 following checks fail for NO_REGS, so eliminate that early. */
20004 if (regclass == NO_REGS)
20007 /* All classes can load zeros. */
20008 if (x == CONST0_RTX (mode))
20011 /* Force constants into memory if we are loading a (nonzero) constant into
20012 an MMX or SSE register. This is because there are no MMX/SSE instructions
20013 to load from a constant. */
20015 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20018 /* Prefer SSE regs only, if we can use them for math. */
20019 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20020 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20022 /* Floating-point constants need more complex checks. */
20023 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20025 /* General regs can load everything. */
20026 if (reg_class_subset_p (regclass, GENERAL_REGS))
20029 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20030 zero above. We only want to wind up preferring 80387 registers if
20031 we plan on doing computation with them. */
20033 && standard_80387_constant_p (x))
20035 /* Limit class to non-sse. */
20036 if (regclass == FLOAT_SSE_REGS)
20038 if (regclass == FP_TOP_SSE_REGS)
20040 if (regclass == FP_SECOND_SSE_REGS)
20041 return FP_SECOND_REG;
20042 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20049 /* Generally when we see PLUS here, it's the function invariant
20050 (plus soft-fp const_int). Which can only be computed into general
20052 if (GET_CODE (x) == PLUS)
20053 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20055 /* QImode constants are easy to load, but non-constant QImode data
20056 must go into Q_REGS. */
20057 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20059 if (reg_class_subset_p (regclass, Q_REGS))
20061 if (reg_class_subset_p (Q_REGS, regclass))
20069 /* Discourage putting floating-point values in SSE registers unless
20070 SSE math is being used, and likewise for the 387 registers. */
20072 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20074 enum machine_mode mode = GET_MODE (x);
20076 /* Restrict the output reload class to the register bank that we are doing
20077 math on. If we would like not to return a subset of CLASS, reject this
20078 alternative: if reload cannot do this, it will still use its choice. */
20079 mode = GET_MODE (x);
20080 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20081 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20083 if (X87_FLOAT_MODE_P (mode))
20085 if (regclass == FP_TOP_SSE_REGS)
20087 else if (regclass == FP_SECOND_SSE_REGS)
20088 return FP_SECOND_REG;
20090 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20096 /* If we are copying between general and FP registers, we need a memory
20097 location. The same is true for SSE and MMX registers.
20099 The macro can't work reliably when one of the CLASSES is class containing
20100 registers from multiple units (SSE, MMX, integer). We avoid this by never
20101 combining those units in single alternative in the machine description.
20102 Ensure that this constraint holds to avoid unexpected surprises.
20104 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20105 enforce these sanity checks. */
20108 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20109 enum machine_mode mode, int strict)
20111 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20112 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20113 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20114 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20115 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20116 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20118 gcc_assert (!strict);
20122 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20125 /* ??? This is a lie. We do have moves between mmx/general, and for
20126 mmx/sse2. But by saying we need secondary memory we discourage the
20127 register allocator from using the mmx registers unless needed. */
20128 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20131 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20133 /* SSE1 doesn't have any direct moves from other classes. */
20137 /* If the target says that inter-unit moves are more expensive
20138 than moving through memory, then don't generate them. */
20139 if (!TARGET_INTER_UNIT_MOVES)
20142 /* Between SSE and general, we have moves no larger than word size. */
20143 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20150 /* Return true if the registers in CLASS cannot represent the change from
20151 modes FROM to TO. */
20154 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20155 enum reg_class regclass)
20160 /* x87 registers can't do subreg at all, as all values are reformatted
20161 to extended precision. */
20162 if (MAYBE_FLOAT_CLASS_P (regclass))
20165 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20167 /* Vector registers do not support QI or HImode loads. If we don't
20168 disallow a change to these modes, reload will assume it's ok to
20169 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20170 the vec_dupv4hi pattern. */
20171 if (GET_MODE_SIZE (from) < 4)
20174 /* Vector registers do not support subreg with nonzero offsets, which
20175 are otherwise valid for integer registers. Since we can't see
20176 whether we have a nonzero offset from here, prohibit all
20177 nonparadoxical subregs changing size. */
20178 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20185 /* Return the cost of moving data from a register in class CLASS1 to
20186 one in class CLASS2.
20188 It is not required that the cost always equal 2 when FROM is the same as TO;
20189 on some machines it is expensive to move between registers if they are not
20190 general registers. */
20193 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20194 enum reg_class class2)
20196 /* In case we require secondary memory, compute cost of the store followed
20197 by load. In order to avoid bad register allocation choices, we need
20198 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20200 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
20204 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
20205 MEMORY_MOVE_COST (mode, class1, 1));
20206 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
20207 MEMORY_MOVE_COST (mode, class2, 1));
20209 /* In case of copying from general_purpose_register we may emit multiple
20210 stores followed by single load causing memory size mismatch stall.
20211 Count this as arbitrarily high cost of 20. */
20212 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20215 /* In the case of FP/MMX moves, the registers actually overlap, and we
20216 have to switch modes in order to treat them differently. */
20217 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20218 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20224 /* Moves between SSE/MMX and integer unit are expensive. */
20225 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20226 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20227 return ix86_cost->mmxsse_to_integer;
20228 if (MAYBE_FLOAT_CLASS_P (class1))
20229 return ix86_cost->fp_move;
20230 if (MAYBE_SSE_CLASS_P (class1))
20231 return ix86_cost->sse_move;
20232 if (MAYBE_MMX_CLASS_P (class1))
20233 return ix86_cost->mmx_move;
20237 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20240 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20242 /* Flags and only flags can only hold CCmode values. */
20243 if (CC_REGNO_P (regno))
20244 return GET_MODE_CLASS (mode) == MODE_CC;
20245 if (GET_MODE_CLASS (mode) == MODE_CC
20246 || GET_MODE_CLASS (mode) == MODE_RANDOM
20247 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20249 if (FP_REGNO_P (regno))
20250 return VALID_FP_MODE_P (mode);
20251 if (SSE_REGNO_P (regno))
20253 /* We implement the move patterns for all vector modes into and
20254 out of SSE registers, even when no operation instructions
20256 return (VALID_SSE_REG_MODE (mode)
20257 || VALID_SSE2_REG_MODE (mode)
20258 || VALID_MMX_REG_MODE (mode)
20259 || VALID_MMX_REG_MODE_3DNOW (mode));
20261 if (MMX_REGNO_P (regno))
20263 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20264 so if the register is available at all, then we can move data of
20265 the given mode into or out of it. */
20266 return (VALID_MMX_REG_MODE (mode)
20267 || VALID_MMX_REG_MODE_3DNOW (mode));
20270 if (mode == QImode)
20272 /* Take care for QImode values - they can be in non-QI regs,
20273 but then they do cause partial register stalls. */
20274 if (regno < 4 || TARGET_64BIT)
20276 if (!TARGET_PARTIAL_REG_STALL)
20278 return reload_in_progress || reload_completed;
20280 /* We handle both integer and floats in the general purpose registers. */
20281 else if (VALID_INT_MODE_P (mode))
20283 else if (VALID_FP_MODE_P (mode))
20285 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20286 on to use that value in smaller contexts, this can easily force a
20287 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20288 supporting DImode, allow it. */
20289 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20295 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20296 tieable integer mode. */
20299 ix86_tieable_integer_mode_p (enum machine_mode mode)
20308 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20311 return TARGET_64BIT;
20318 /* Return true if MODE1 is accessible in a register that can hold MODE2
20319 without copying. That is, all register classes that can hold MODE2
20320 can also hold MODE1. */
20323 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20325 if (mode1 == mode2)
20328 if (ix86_tieable_integer_mode_p (mode1)
20329 && ix86_tieable_integer_mode_p (mode2))
20332 /* MODE2 being XFmode implies fp stack or general regs, which means we
20333 can tie any smaller floating point modes to it. Note that we do not
20334 tie this with TFmode. */
20335 if (mode2 == XFmode)
20336 return mode1 == SFmode || mode1 == DFmode;
20338 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20339 that we can tie it with SFmode. */
20340 if (mode2 == DFmode)
20341 return mode1 == SFmode;
20343 /* If MODE2 is only appropriate for an SSE register, then tie with
20344 any other mode acceptable to SSE registers. */
20345 if (GET_MODE_SIZE (mode2) == 16
20346 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20347 return (GET_MODE_SIZE (mode1) == 16
20348 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20350 /* If MODE2 is appropriate for an MMX register, then tie
20351 with any other mode acceptable to MMX registers. */
20352 if (GET_MODE_SIZE (mode2) == 8
20353 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20354 return (GET_MODE_SIZE (mode1) == 8
20355 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20360 /* Return the cost of moving data of mode M between a
20361 register and memory. A value of 2 is the default; this cost is
20362 relative to those in `REGISTER_MOVE_COST'.
20364 If moving between registers and memory is more expensive than
20365 between two registers, you should define this macro to express the
20368 Model also increased moving costs of QImode registers in non
20372 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20374 if (FLOAT_CLASS_P (regclass))
20391 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20393 if (SSE_CLASS_P (regclass))
20396 switch (GET_MODE_SIZE (mode))
20410 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20412 if (MMX_CLASS_P (regclass))
20415 switch (GET_MODE_SIZE (mode))
20426 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20428 switch (GET_MODE_SIZE (mode))
20432 return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
20433 : ix86_cost->movzbl_load);
20435 return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
20436 : ix86_cost->int_store[0] + 4);
20439 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20441 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20442 if (mode == TFmode)
20444 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20445 * (((int) GET_MODE_SIZE (mode)
20446 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20450 /* Compute a (partial) cost for rtx X. Return true if the complete
20451 cost has been computed, and false if subexpressions should be
20452 scanned. In either case, *TOTAL contains the cost result. */
20455 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20457 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20458 enum machine_mode mode = GET_MODE (x);
20466 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20468 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20470 else if (flag_pic && SYMBOLIC_CONST (x)
20472 || (!GET_CODE (x) != LABEL_REF
20473 && (GET_CODE (x) != SYMBOL_REF
20474 || !SYMBOL_REF_LOCAL_P (x)))))
20481 if (mode == VOIDmode)
20484 switch (standard_80387_constant_p (x))
20489 default: /* Other constants */
20494 /* Start with (MEM (SYMBOL_REF)), since that's where
20495 it'll probably end up. Add a penalty for size. */
20496 *total = (COSTS_N_INSNS (1)
20497 + (flag_pic != 0 && !TARGET_64BIT)
20498 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20504 /* The zero extensions is often completely free on x86_64, so make
20505 it as cheap as possible. */
20506 if (TARGET_64BIT && mode == DImode
20507 && GET_MODE (XEXP (x, 0)) == SImode)
20509 else if (TARGET_ZERO_EXTEND_WITH_AND)
20510 *total = ix86_cost->add;
20512 *total = ix86_cost->movzx;
20516 *total = ix86_cost->movsx;
20520 if (CONST_INT_P (XEXP (x, 1))
20521 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20523 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20526 *total = ix86_cost->add;
20529 if ((value == 2 || value == 3)
20530 && ix86_cost->lea <= ix86_cost->shift_const)
20532 *total = ix86_cost->lea;
20542 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20544 if (CONST_INT_P (XEXP (x, 1)))
20546 if (INTVAL (XEXP (x, 1)) > 32)
20547 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20549 *total = ix86_cost->shift_const * 2;
20553 if (GET_CODE (XEXP (x, 1)) == AND)
20554 *total = ix86_cost->shift_var * 2;
20556 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20561 if (CONST_INT_P (XEXP (x, 1)))
20562 *total = ix86_cost->shift_const;
20564 *total = ix86_cost->shift_var;
20569 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20571 /* ??? SSE scalar cost should be used here. */
20572 *total = ix86_cost->fmul;
20575 else if (X87_FLOAT_MODE_P (mode))
20577 *total = ix86_cost->fmul;
20580 else if (FLOAT_MODE_P (mode))
20582 /* ??? SSE vector cost should be used here. */
20583 *total = ix86_cost->fmul;
20588 rtx op0 = XEXP (x, 0);
20589 rtx op1 = XEXP (x, 1);
20591 if (CONST_INT_P (XEXP (x, 1)))
20593 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20594 for (nbits = 0; value != 0; value &= value - 1)
20598 /* This is arbitrary. */
20601 /* Compute costs correctly for widening multiplication. */
20602 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20603 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20604 == GET_MODE_SIZE (mode))
20606 int is_mulwiden = 0;
20607 enum machine_mode inner_mode = GET_MODE (op0);
20609 if (GET_CODE (op0) == GET_CODE (op1))
20610 is_mulwiden = 1, op1 = XEXP (op1, 0);
20611 else if (CONST_INT_P (op1))
20613 if (GET_CODE (op0) == SIGN_EXTEND)
20614 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20617 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20621 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20624 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20625 + nbits * ix86_cost->mult_bit
20626 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20635 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20636 /* ??? SSE cost should be used here. */
20637 *total = ix86_cost->fdiv;
20638 else if (X87_FLOAT_MODE_P (mode))
20639 *total = ix86_cost->fdiv;
20640 else if (FLOAT_MODE_P (mode))
20641 /* ??? SSE vector cost should be used here. */
20642 *total = ix86_cost->fdiv;
20644 *total = ix86_cost->divide[MODE_INDEX (mode)];
20648 if (GET_MODE_CLASS (mode) == MODE_INT
20649 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20651 if (GET_CODE (XEXP (x, 0)) == PLUS
20652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20653 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20654 && CONSTANT_P (XEXP (x, 1)))
20656 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20657 if (val == 2 || val == 4 || val == 8)
20659 *total = ix86_cost->lea;
20660 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20661 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20663 *total += rtx_cost (XEXP (x, 1), outer_code);
20667 else if (GET_CODE (XEXP (x, 0)) == MULT
20668 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20670 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20671 if (val == 2 || val == 4 || val == 8)
20673 *total = ix86_cost->lea;
20674 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20675 *total += rtx_cost (XEXP (x, 1), outer_code);
20679 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20681 *total = ix86_cost->lea;
20682 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20683 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20684 *total += rtx_cost (XEXP (x, 1), outer_code);
20691 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20693 /* ??? SSE cost should be used here. */
20694 *total = ix86_cost->fadd;
20697 else if (X87_FLOAT_MODE_P (mode))
20699 *total = ix86_cost->fadd;
20702 else if (FLOAT_MODE_P (mode))
20704 /* ??? SSE vector cost should be used here. */
20705 *total = ix86_cost->fadd;
20713 if (!TARGET_64BIT && mode == DImode)
20715 *total = (ix86_cost->add * 2
20716 + (rtx_cost (XEXP (x, 0), outer_code)
20717 << (GET_MODE (XEXP (x, 0)) != DImode))
20718 + (rtx_cost (XEXP (x, 1), outer_code)
20719 << (GET_MODE (XEXP (x, 1)) != DImode)));
20725 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20727 /* ??? SSE cost should be used here. */
20728 *total = ix86_cost->fchs;
20731 else if (X87_FLOAT_MODE_P (mode))
20733 *total = ix86_cost->fchs;
20736 else if (FLOAT_MODE_P (mode))
20738 /* ??? SSE vector cost should be used here. */
20739 *total = ix86_cost->fchs;
20745 if (!TARGET_64BIT && mode == DImode)
20746 *total = ix86_cost->add * 2;
20748 *total = ix86_cost->add;
20752 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20753 && XEXP (XEXP (x, 0), 1) == const1_rtx
20754 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20755 && XEXP (x, 1) == const0_rtx)
20757 /* This kind of construct is implemented using test[bwl].
20758 Treat it as if we had an AND. */
20759 *total = (ix86_cost->add
20760 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20761 + rtx_cost (const1_rtx, outer_code));
20767 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20772 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20773 /* ??? SSE cost should be used here. */
20774 *total = ix86_cost->fabs;
20775 else if (X87_FLOAT_MODE_P (mode))
20776 *total = ix86_cost->fabs;
20777 else if (FLOAT_MODE_P (mode))
20778 /* ??? SSE vector cost should be used here. */
20779 *total = ix86_cost->fabs;
20783 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20784 /* ??? SSE cost should be used here. */
20785 *total = ix86_cost->fsqrt;
20786 else if (X87_FLOAT_MODE_P (mode))
20787 *total = ix86_cost->fsqrt;
20788 else if (FLOAT_MODE_P (mode))
20789 /* ??? SSE vector cost should be used here. */
20790 *total = ix86_cost->fsqrt;
20794 if (XINT (x, 1) == UNSPEC_TP)
20805 static int current_machopic_label_num;
20807 /* Given a symbol name and its associated stub, write out the
20808 definition of the stub. */
20811 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20813 unsigned int length;
20814 char *binder_name, *symbol_name, lazy_ptr_name[32];
20815 int label = ++current_machopic_label_num;
20817 /* For 64-bit we shouldn't get here. */
20818 gcc_assert (!TARGET_64BIT);
20820 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20821 symb = (*targetm.strip_name_encoding) (symb);
20823 length = strlen (stub);
20824 binder_name = alloca (length + 32);
20825 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20827 length = strlen (symb);
20828 symbol_name = alloca (length + 32);
20829 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20831 sprintf (lazy_ptr_name, "L%d$lz", label);
20834 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20836 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20838 fprintf (file, "%s:\n", stub);
20839 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20843 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20844 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20845 fprintf (file, "\tjmp\t*%%edx\n");
20848 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20850 fprintf (file, "%s:\n", binder_name);
20854 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20855 fprintf (file, "\tpushl\t%%eax\n");
20858 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20860 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20862 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20863 fprintf (file, "%s:\n", lazy_ptr_name);
20864 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20865 fprintf (file, "\t.long %s\n", binder_name);
20869 darwin_x86_file_end (void)
20871 darwin_file_end ();
20874 #endif /* TARGET_MACHO */
20876 /* Order the registers for register allocator. */
20879 x86_order_regs_for_local_alloc (void)
20884 /* First allocate the local general purpose registers. */
20885 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20886 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20887 reg_alloc_order [pos++] = i;
20889 /* Global general purpose registers. */
20890 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20891 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20892 reg_alloc_order [pos++] = i;
20894 /* x87 registers come first in case we are doing FP math
20896 if (!TARGET_SSE_MATH)
20897 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20898 reg_alloc_order [pos++] = i;
20900 /* SSE registers. */
20901 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20902 reg_alloc_order [pos++] = i;
20903 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20904 reg_alloc_order [pos++] = i;
20906 /* x87 registers. */
20907 if (TARGET_SSE_MATH)
20908 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20909 reg_alloc_order [pos++] = i;
20911 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20912 reg_alloc_order [pos++] = i;
20914 /* Initialize the rest of array as we do not allocate some registers
20916 while (pos < FIRST_PSEUDO_REGISTER)
20917 reg_alloc_order [pos++] = 0;
20920 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20921 struct attribute_spec.handler. */
20923 ix86_handle_struct_attribute (tree *node, tree name,
20924 tree args ATTRIBUTE_UNUSED,
20925 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20928 if (DECL_P (*node))
20930 if (TREE_CODE (*node) == TYPE_DECL)
20931 type = &TREE_TYPE (*node);
20936 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20937 || TREE_CODE (*type) == UNION_TYPE)))
20939 warning (OPT_Wattributes, "%qs attribute ignored",
20940 IDENTIFIER_POINTER (name));
20941 *no_add_attrs = true;
20944 else if ((is_attribute_p ("ms_struct", name)
20945 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20946 || ((is_attribute_p ("gcc_struct", name)
20947 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20949 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20950 IDENTIFIER_POINTER (name));
20951 *no_add_attrs = true;
20958 ix86_ms_bitfield_layout_p (tree record_type)
20960 return (TARGET_MS_BITFIELD_LAYOUT &&
20961 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20962 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20965 /* Returns an expression indicating where the this parameter is
20966 located on entry to the FUNCTION. */
20969 x86_this_parameter (tree function)
20971 tree type = TREE_TYPE (function);
20972 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20976 const int *parm_regs;
20978 if (TARGET_64BIT_MS_ABI)
20979 parm_regs = x86_64_ms_abi_int_parameter_registers;
20981 parm_regs = x86_64_int_parameter_registers;
20982 return gen_rtx_REG (DImode, parm_regs[aggr]);
20985 if (ix86_function_regparm (type, function) > 0
20986 && !type_has_variadic_args_p (type))
20989 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20991 return gen_rtx_REG (SImode, regno);
20994 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20997 /* Determine whether x86_output_mi_thunk can succeed. */
21000 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21001 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21002 HOST_WIDE_INT vcall_offset, tree function)
21004 /* 64-bit can handle anything. */
21008 /* For 32-bit, everything's fine if we have one free register. */
21009 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21012 /* Need a free register for vcall_offset. */
21016 /* Need a free register for GOT references. */
21017 if (flag_pic && !(*targetm.binds_local_p) (function))
21020 /* Otherwise ok. */
21024 /* Output the assembler code for a thunk function. THUNK_DECL is the
21025 declaration for the thunk function itself, FUNCTION is the decl for
21026 the target function. DELTA is an immediate constant offset to be
21027 added to THIS. If VCALL_OFFSET is nonzero, the word at
21028 *(*this + vcall_offset) should be added to THIS. */
21031 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21032 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21033 HOST_WIDE_INT vcall_offset, tree function)
21036 rtx this_param = x86_this_parameter (function);
21039 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21040 pull it in now and let DELTA benefit. */
21041 if (REG_P (this_param))
21042 this_reg = this_param;
21043 else if (vcall_offset)
21045 /* Put the this parameter into %eax. */
21046 xops[0] = this_param;
21047 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21048 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21051 this_reg = NULL_RTX;
21053 /* Adjust the this parameter by a fixed constant. */
21056 xops[0] = GEN_INT (delta);
21057 xops[1] = this_reg ? this_reg : this_param;
21060 if (!x86_64_general_operand (xops[0], DImode))
21062 tmp = gen_rtx_REG (DImode, R10_REG);
21064 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21066 xops[1] = this_param;
21068 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21071 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21074 /* Adjust the this parameter by a value stored in the vtable. */
21078 tmp = gen_rtx_REG (DImode, R10_REG);
21081 int tmp_regno = 2 /* ECX */;
21082 if (lookup_attribute ("fastcall",
21083 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21084 tmp_regno = 0 /* EAX */;
21085 tmp = gen_rtx_REG (SImode, tmp_regno);
21088 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21091 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21093 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21095 /* Adjust the this parameter. */
21096 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21097 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21099 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21100 xops[0] = GEN_INT (vcall_offset);
21102 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21103 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21105 xops[1] = this_reg;
21107 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21109 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21112 /* If necessary, drop THIS back to its stack slot. */
21113 if (this_reg && this_reg != this_param)
21115 xops[0] = this_reg;
21116 xops[1] = this_param;
21117 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21120 xops[0] = XEXP (DECL_RTL (function), 0);
21123 if (!flag_pic || (*targetm.binds_local_p) (function))
21124 output_asm_insn ("jmp\t%P0", xops);
21125 /* All thunks should be in the same object as their target,
21126 and thus binds_local_p should be true. */
21127 else if (TARGET_64BIT_MS_ABI)
21128 gcc_unreachable ();
21131 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21132 tmp = gen_rtx_CONST (Pmode, tmp);
21133 tmp = gen_rtx_MEM (QImode, tmp);
21135 output_asm_insn ("jmp\t%A0", xops);
21140 if (!flag_pic || (*targetm.binds_local_p) (function))
21141 output_asm_insn ("jmp\t%P0", xops);
21146 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21147 tmp = (gen_rtx_SYMBOL_REF
21149 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21150 tmp = gen_rtx_MEM (QImode, tmp);
21152 output_asm_insn ("jmp\t%0", xops);
21155 #endif /* TARGET_MACHO */
21157 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21158 output_set_got (tmp, NULL_RTX);
21161 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21162 output_asm_insn ("jmp\t{*}%1", xops);
21168 x86_file_start (void)
21170 default_file_start ();
21172 darwin_file_start ();
21174 if (X86_FILE_START_VERSION_DIRECTIVE)
21175 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21176 if (X86_FILE_START_FLTUSED)
21177 fputs ("\t.global\t__fltused\n", asm_out_file);
21178 if (ix86_asm_dialect == ASM_INTEL)
21179 fputs ("\t.intel_syntax\n", asm_out_file);
21183 x86_field_alignment (tree field, int computed)
21185 enum machine_mode mode;
21186 tree type = TREE_TYPE (field);
21188 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21190 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21191 ? get_inner_array_type (type) : type);
21192 if (mode == DFmode || mode == DCmode
21193 || GET_MODE_CLASS (mode) == MODE_INT
21194 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21195 return MIN (32, computed);
21199 /* Output assembler code to FILE to increment profiler label # LABELNO
21200 for profiling a function entry. */
21202 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21206 #ifndef NO_PROFILE_COUNTERS
21207 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21210 if (!TARGET_64BIT_MS_ABI && flag_pic)
21211 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21213 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21217 #ifndef NO_PROFILE_COUNTERS
21218 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21219 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21221 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21225 #ifndef NO_PROFILE_COUNTERS
21226 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21227 PROFILE_COUNT_REGISTER);
21229 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21233 /* We don't have exact information about the insn sizes, but we may assume
21234 quite safely that we are informed about all 1 byte insns and memory
21235 address sizes. This is enough to eliminate unnecessary padding in
21239 min_insn_size (rtx insn)
21243 if (!INSN_P (insn) || !active_insn_p (insn))
21246 /* Discard alignments we've emit and jump instructions. */
21247 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21248 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21251 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21252 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21255 /* Important case - calls are always 5 bytes.
21256 It is common to have many calls in the row. */
21258 && symbolic_reference_mentioned_p (PATTERN (insn))
21259 && !SIBLING_CALL_P (insn))
21261 if (get_attr_length (insn) <= 1)
21264 /* For normal instructions we may rely on the sizes of addresses
21265 and the presence of symbol to require 4 bytes of encoding.
21266 This is not the case for jumps where references are PC relative. */
21267 if (!JUMP_P (insn))
21269 l = get_attr_length_address (insn);
21270 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21279 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21283 ix86_avoid_jump_misspredicts (void)
21285 rtx insn, start = get_insns ();
21286 int nbytes = 0, njumps = 0;
21289 /* Look for all minimal intervals of instructions containing 4 jumps.
21290 The intervals are bounded by START and INSN. NBYTES is the total
21291 size of instructions in the interval including INSN and not including
21292 START. When the NBYTES is smaller than 16 bytes, it is possible
21293 that the end of START and INSN ends up in the same 16byte page.
21295 The smallest offset in the page INSN can start is the case where START
21296 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21297 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21299 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21302 nbytes += min_insn_size (insn);
21304 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21305 INSN_UID (insn), min_insn_size (insn));
21307 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21308 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21316 start = NEXT_INSN (start);
21317 if ((JUMP_P (start)
21318 && GET_CODE (PATTERN (start)) != ADDR_VEC
21319 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21321 njumps--, isjump = 1;
21324 nbytes -= min_insn_size (start);
21326 gcc_assert (njumps >= 0);
21328 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21329 INSN_UID (start), INSN_UID (insn), nbytes);
21331 if (njumps == 3 && isjump && nbytes < 16)
21333 int padsize = 15 - nbytes + min_insn_size (insn);
21336 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21337 INSN_UID (insn), padsize);
21338 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21343 /* AMD Athlon works faster
21344 when RET is not destination of conditional jump or directly preceded
21345 by other jump instruction. We avoid the penalty by inserting NOP just
21346 before the RET instructions in such cases. */
21348 ix86_pad_returns (void)
21353 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21355 basic_block bb = e->src;
21356 rtx ret = BB_END (bb);
21358 bool replace = false;
21360 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21361 || !maybe_hot_bb_p (bb))
21363 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21364 if (active_insn_p (prev) || LABEL_P (prev))
21366 if (prev && LABEL_P (prev))
21371 FOR_EACH_EDGE (e, ei, bb->preds)
21372 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21373 && !(e->flags & EDGE_FALLTHRU))
21378 prev = prev_active_insn (ret);
21380 && ((JUMP_P (prev) && any_condjump_p (prev))
21383 /* Empty functions get branch mispredict even when the jump destination
21384 is not visible to us. */
21385 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21390 emit_insn_before (gen_return_internal_long (), ret);
21396 /* Implement machine specific optimizations. We implement padding of returns
21397 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21401 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21402 ix86_pad_returns ();
21403 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21404 ix86_avoid_jump_misspredicts ();
21407 /* Return nonzero when QImode register that must be represented via REX prefix
21410 x86_extended_QIreg_mentioned_p (rtx insn)
21413 extract_insn_cached (insn);
21414 for (i = 0; i < recog_data.n_operands; i++)
21415 if (REG_P (recog_data.operand[i])
21416 && REGNO (recog_data.operand[i]) >= 4)
21421 /* Return nonzero when P points to register encoded via REX prefix.
21422 Called via for_each_rtx. */
21424 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21426 unsigned int regno;
21429 regno = REGNO (*p);
21430 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21433 /* Return true when INSN mentions register that must be encoded using REX
21436 x86_extended_reg_mentioned_p (rtx insn)
21438 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21441 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21442 optabs would emit if we didn't have TFmode patterns. */
21445 x86_emit_floatuns (rtx operands[2])
21447 rtx neglab, donelab, i0, i1, f0, in, out;
21448 enum machine_mode mode, inmode;
21450 inmode = GET_MODE (operands[1]);
21451 gcc_assert (inmode == SImode || inmode == DImode);
21454 in = force_reg (inmode, operands[1]);
21455 mode = GET_MODE (out);
21456 neglab = gen_label_rtx ();
21457 donelab = gen_label_rtx ();
21458 f0 = gen_reg_rtx (mode);
21460 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21462 expand_float (out, in, 0);
21464 emit_jump_insn (gen_jump (donelab));
21467 emit_label (neglab);
21469 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21471 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21473 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21475 expand_float (f0, i0, 0);
21477 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21479 emit_label (donelab);
21482 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21483 with all elements equal to VAR. Return true if successful. */
21486 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21487 rtx target, rtx val)
21489 enum machine_mode smode, wsmode, wvmode;
21504 val = force_reg (GET_MODE_INNER (mode), val);
21505 x = gen_rtx_VEC_DUPLICATE (mode, val);
21506 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21512 if (TARGET_SSE || TARGET_3DNOW_A)
21514 val = gen_lowpart (SImode, val);
21515 x = gen_rtx_TRUNCATE (HImode, val);
21516 x = gen_rtx_VEC_DUPLICATE (mode, x);
21517 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21539 /* Extend HImode to SImode using a paradoxical SUBREG. */
21540 tmp1 = gen_reg_rtx (SImode);
21541 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21542 /* Insert the SImode value as low element of V4SImode vector. */
21543 tmp2 = gen_reg_rtx (V4SImode);
21544 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21545 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21546 CONST0_RTX (V4SImode),
21548 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21549 /* Cast the V4SImode vector back to a V8HImode vector. */
21550 tmp1 = gen_reg_rtx (V8HImode);
21551 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21552 /* Duplicate the low short through the whole low SImode word. */
21553 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21554 /* Cast the V8HImode vector back to a V4SImode vector. */
21555 tmp2 = gen_reg_rtx (V4SImode);
21556 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21557 /* Replicate the low element of the V4SImode vector. */
21558 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21559 /* Cast the V2SImode back to V8HImode, and store in target. */
21560 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21571 /* Extend QImode to SImode using a paradoxical SUBREG. */
21572 tmp1 = gen_reg_rtx (SImode);
21573 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21574 /* Insert the SImode value as low element of V4SImode vector. */
21575 tmp2 = gen_reg_rtx (V4SImode);
21576 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21577 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21578 CONST0_RTX (V4SImode),
21580 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21581 /* Cast the V4SImode vector back to a V16QImode vector. */
21582 tmp1 = gen_reg_rtx (V16QImode);
21583 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21584 /* Duplicate the low byte through the whole low SImode word. */
21585 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21586 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21587 /* Cast the V16QImode vector back to a V4SImode vector. */
21588 tmp2 = gen_reg_rtx (V4SImode);
21589 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21590 /* Replicate the low element of the V4SImode vector. */
21591 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21592 /* Cast the V2SImode back to V16QImode, and store in target. */
21593 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21601 /* Replicate the value once into the next wider mode and recurse. */
21602 val = convert_modes (wsmode, smode, val, true);
21603 x = expand_simple_binop (wsmode, ASHIFT, val,
21604 GEN_INT (GET_MODE_BITSIZE (smode)),
21605 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21606 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21608 x = gen_reg_rtx (wvmode);
21609 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21610 gcc_unreachable ();
21611 emit_move_insn (target, gen_lowpart (mode, x));
21619 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21620 whose ONE_VAR element is VAR, and other elements are zero. Return true
21624 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21625 rtx target, rtx var, int one_var)
21627 enum machine_mode vsimode;
21643 var = force_reg (GET_MODE_INNER (mode), var);
21644 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21645 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21650 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21651 new_target = gen_reg_rtx (mode);
21653 new_target = target;
21654 var = force_reg (GET_MODE_INNER (mode), var);
21655 x = gen_rtx_VEC_DUPLICATE (mode, var);
21656 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21657 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21660 /* We need to shuffle the value to the correct position, so
21661 create a new pseudo to store the intermediate result. */
21663 /* With SSE2, we can use the integer shuffle insns. */
21664 if (mode != V4SFmode && TARGET_SSE2)
21666 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21668 GEN_INT (one_var == 1 ? 0 : 1),
21669 GEN_INT (one_var == 2 ? 0 : 1),
21670 GEN_INT (one_var == 3 ? 0 : 1)));
21671 if (target != new_target)
21672 emit_move_insn (target, new_target);
21676 /* Otherwise convert the intermediate result to V4SFmode and
21677 use the SSE1 shuffle instructions. */
21678 if (mode != V4SFmode)
21680 tmp = gen_reg_rtx (V4SFmode);
21681 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21686 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21688 GEN_INT (one_var == 1 ? 0 : 1),
21689 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21690 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21692 if (mode != V4SFmode)
21693 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21694 else if (tmp != target)
21695 emit_move_insn (target, tmp);
21697 else if (target != new_target)
21698 emit_move_insn (target, new_target);
21703 vsimode = V4SImode;
21709 vsimode = V2SImode;
21715 /* Zero extend the variable element to SImode and recurse. */
21716 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21718 x = gen_reg_rtx (vsimode);
21719 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21721 gcc_unreachable ();
21723 emit_move_insn (target, gen_lowpart (mode, x));
21731 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21732 consisting of the values in VALS. It is known that all elements
21733 except ONE_VAR are constants. Return true if successful. */
21736 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21737 rtx target, rtx vals, int one_var)
21739 rtx var = XVECEXP (vals, 0, one_var);
21740 enum machine_mode wmode;
21743 const_vec = copy_rtx (vals);
21744 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21745 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21753 /* For the two element vectors, it's just as easy to use
21754 the general case. */
21770 /* There's no way to set one QImode entry easily. Combine
21771 the variable value with its adjacent constant value, and
21772 promote to an HImode set. */
21773 x = XVECEXP (vals, 0, one_var ^ 1);
21776 var = convert_modes (HImode, QImode, var, true);
21777 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21778 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21779 x = GEN_INT (INTVAL (x) & 0xff);
21783 var = convert_modes (HImode, QImode, var, true);
21784 x = gen_int_mode (INTVAL (x) << 8, HImode);
21786 if (x != const0_rtx)
21787 var = expand_simple_binop (HImode, IOR, var, x, var,
21788 1, OPTAB_LIB_WIDEN);
21790 x = gen_reg_rtx (wmode);
21791 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21792 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21794 emit_move_insn (target, gen_lowpart (mode, x));
21801 emit_move_insn (target, const_vec);
21802 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21806 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21807 all values variable, and none identical. */
21810 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21811 rtx target, rtx vals)
21813 enum machine_mode half_mode = GET_MODE_INNER (mode);
21814 rtx op0 = NULL, op1 = NULL;
21815 bool use_vec_concat = false;
21821 if (!mmx_ok && !TARGET_SSE)
21827 /* For the two element vectors, we always implement VEC_CONCAT. */
21828 op0 = XVECEXP (vals, 0, 0);
21829 op1 = XVECEXP (vals, 0, 1);
21830 use_vec_concat = true;
21834 half_mode = V2SFmode;
21837 half_mode = V2SImode;
21843 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21844 Recurse to load the two halves. */
21846 op0 = gen_reg_rtx (half_mode);
21847 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21848 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21850 op1 = gen_reg_rtx (half_mode);
21851 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21852 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21854 use_vec_concat = true;
21865 gcc_unreachable ();
21868 if (use_vec_concat)
21870 if (!register_operand (op0, half_mode))
21871 op0 = force_reg (half_mode, op0);
21872 if (!register_operand (op1, half_mode))
21873 op1 = force_reg (half_mode, op1);
21875 emit_insn (gen_rtx_SET (VOIDmode, target,
21876 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21880 int i, j, n_elts, n_words, n_elt_per_word;
21881 enum machine_mode inner_mode;
21882 rtx words[4], shift;
21884 inner_mode = GET_MODE_INNER (mode);
21885 n_elts = GET_MODE_NUNITS (mode);
21886 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21887 n_elt_per_word = n_elts / n_words;
21888 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21890 for (i = 0; i < n_words; ++i)
21892 rtx word = NULL_RTX;
21894 for (j = 0; j < n_elt_per_word; ++j)
21896 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21897 elt = convert_modes (word_mode, inner_mode, elt, true);
21903 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21904 word, 1, OPTAB_LIB_WIDEN);
21905 word = expand_simple_binop (word_mode, IOR, word, elt,
21906 word, 1, OPTAB_LIB_WIDEN);
21914 emit_move_insn (target, gen_lowpart (mode, words[0]));
21915 else if (n_words == 2)
21917 rtx tmp = gen_reg_rtx (mode);
21918 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21919 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21920 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21921 emit_move_insn (target, tmp);
21923 else if (n_words == 4)
21925 rtx tmp = gen_reg_rtx (V4SImode);
21926 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21927 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21928 emit_move_insn (target, gen_lowpart (mode, tmp));
21931 gcc_unreachable ();
21935 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21936 instructions unless MMX_OK is true. */
21939 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21941 enum machine_mode mode = GET_MODE (target);
21942 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21943 int n_elts = GET_MODE_NUNITS (mode);
21944 int n_var = 0, one_var = -1;
21945 bool all_same = true, all_const_zero = true;
21949 for (i = 0; i < n_elts; ++i)
21951 x = XVECEXP (vals, 0, i);
21952 if (!CONSTANT_P (x))
21953 n_var++, one_var = i;
21954 else if (x != CONST0_RTX (inner_mode))
21955 all_const_zero = false;
21956 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21960 /* Constants are best loaded from the constant pool. */
21963 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21967 /* If all values are identical, broadcast the value. */
21969 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21970 XVECEXP (vals, 0, 0)))
21973 /* Values where only one field is non-constant are best loaded from
21974 the pool and overwritten via move later. */
21978 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21979 XVECEXP (vals, 0, one_var),
21983 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21987 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21991 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21993 enum machine_mode mode = GET_MODE (target);
21994 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21995 bool use_vec_merge = false;
22004 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22005 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22007 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22009 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22010 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22016 use_vec_merge = TARGET_SSE4_1;
22024 /* For the two element vectors, we implement a VEC_CONCAT with
22025 the extraction of the other element. */
22027 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22028 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22031 op0 = val, op1 = tmp;
22033 op0 = tmp, op1 = val;
22035 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22036 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22041 use_vec_merge = TARGET_SSE4_1;
22048 use_vec_merge = true;
22052 /* tmp = target = A B C D */
22053 tmp = copy_to_reg (target);
22054 /* target = A A B B */
22055 emit_insn (gen_sse_unpcklps (target, target, target));
22056 /* target = X A B B */
22057 ix86_expand_vector_set (false, target, val, 0);
22058 /* target = A X C D */
22059 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22060 GEN_INT (1), GEN_INT (0),
22061 GEN_INT (2+4), GEN_INT (3+4)));
22065 /* tmp = target = A B C D */
22066 tmp = copy_to_reg (target);
22067 /* tmp = X B C D */
22068 ix86_expand_vector_set (false, tmp, val, 0);
22069 /* target = A B X D */
22070 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22071 GEN_INT (0), GEN_INT (1),
22072 GEN_INT (0+4), GEN_INT (3+4)));
22076 /* tmp = target = A B C D */
22077 tmp = copy_to_reg (target);
22078 /* tmp = X B C D */
22079 ix86_expand_vector_set (false, tmp, val, 0);
22080 /* target = A B X D */
22081 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22082 GEN_INT (0), GEN_INT (1),
22083 GEN_INT (2+4), GEN_INT (0+4)));
22087 gcc_unreachable ();
22092 use_vec_merge = TARGET_SSE4_1;
22096 /* Element 0 handled by vec_merge below. */
22099 use_vec_merge = true;
22105 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22106 store into element 0, then shuffle them back. */
22110 order[0] = GEN_INT (elt);
22111 order[1] = const1_rtx;
22112 order[2] = const2_rtx;
22113 order[3] = GEN_INT (3);
22114 order[elt] = const0_rtx;
22116 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22117 order[1], order[2], order[3]));
22119 ix86_expand_vector_set (false, target, val, 0);
22121 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22122 order[1], order[2], order[3]));
22126 /* For SSE1, we have to reuse the V4SF code. */
22127 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22128 gen_lowpart (SFmode, val), elt);
22133 use_vec_merge = TARGET_SSE2;
22136 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22140 use_vec_merge = TARGET_SSE4_1;
22150 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22151 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22152 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22156 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22158 emit_move_insn (mem, target);
22160 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22161 emit_move_insn (tmp, val);
22163 emit_move_insn (target, mem);
22168 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22170 enum machine_mode mode = GET_MODE (vec);
22171 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22172 bool use_vec_extr = false;
22185 use_vec_extr = true;
22189 use_vec_extr = TARGET_SSE4_1;
22201 tmp = gen_reg_rtx (mode);
22202 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22203 GEN_INT (elt), GEN_INT (elt),
22204 GEN_INT (elt+4), GEN_INT (elt+4)));
22208 tmp = gen_reg_rtx (mode);
22209 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22213 gcc_unreachable ();
22216 use_vec_extr = true;
22221 use_vec_extr = TARGET_SSE4_1;
22235 tmp = gen_reg_rtx (mode);
22236 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22237 GEN_INT (elt), GEN_INT (elt),
22238 GEN_INT (elt), GEN_INT (elt)));
22242 tmp = gen_reg_rtx (mode);
22243 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22247 gcc_unreachable ();
22250 use_vec_extr = true;
22255 /* For SSE1, we have to reuse the V4SF code. */
22256 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22257 gen_lowpart (V4SFmode, vec), elt);
22263 use_vec_extr = TARGET_SSE2;
22266 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22270 use_vec_extr = TARGET_SSE4_1;
22274 /* ??? Could extract the appropriate HImode element and shift. */
22281 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22282 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22284 /* Let the rtl optimizers know about the zero extension performed. */
22285 if (inner_mode == QImode || inner_mode == HImode)
22287 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22288 target = gen_lowpart (SImode, target);
22291 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22295 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22297 emit_move_insn (mem, vec);
22299 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22300 emit_move_insn (target, tmp);
22304 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22305 pattern to reduce; DEST is the destination; IN is the input vector. */
22308 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22310 rtx tmp1, tmp2, tmp3;
22312 tmp1 = gen_reg_rtx (V4SFmode);
22313 tmp2 = gen_reg_rtx (V4SFmode);
22314 tmp3 = gen_reg_rtx (V4SFmode);
22316 emit_insn (gen_sse_movhlps (tmp1, in, in));
22317 emit_insn (fn (tmp2, tmp1, in));
22319 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22320 GEN_INT (1), GEN_INT (1),
22321 GEN_INT (1+4), GEN_INT (1+4)));
22322 emit_insn (fn (dest, tmp2, tmp3));
22325 /* Target hook for scalar_mode_supported_p. */
22327 ix86_scalar_mode_supported_p (enum machine_mode mode)
22329 if (DECIMAL_FLOAT_MODE_P (mode))
22331 else if (mode == TFmode)
22332 return TARGET_64BIT;
22334 return default_scalar_mode_supported_p (mode);
22337 /* Implements target hook vector_mode_supported_p. */
22339 ix86_vector_mode_supported_p (enum machine_mode mode)
22341 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22343 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22345 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22347 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22352 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22354 We do this in the new i386 backend to maintain source compatibility
22355 with the old cc0-based compiler. */
22358 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22359 tree inputs ATTRIBUTE_UNUSED,
22362 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22364 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22369 /* Implements target vector targetm.asm.encode_section_info. This
22370 is not used by netware. */
22372 static void ATTRIBUTE_UNUSED
22373 ix86_encode_section_info (tree decl, rtx rtl, int first)
22375 default_encode_section_info (decl, rtl, first);
22377 if (TREE_CODE (decl) == VAR_DECL
22378 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22379 && ix86_in_large_data_p (decl))
22380 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22383 /* Worker function for REVERSE_CONDITION. */
22386 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22388 return (mode != CCFPmode && mode != CCFPUmode
22389 ? reverse_condition (code)
22390 : reverse_condition_maybe_unordered (code));
22393 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22397 output_387_reg_move (rtx insn, rtx *operands)
22399 if (REG_P (operands[0]))
22401 if (REG_P (operands[1])
22402 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22404 if (REGNO (operands[0]) == FIRST_STACK_REG)
22405 return output_387_ffreep (operands, 0);
22406 return "fstp\t%y0";
22408 if (STACK_TOP_P (operands[0]))
22409 return "fld%z1\t%y1";
22412 else if (MEM_P (operands[0]))
22414 gcc_assert (REG_P (operands[1]));
22415 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22416 return "fstp%z0\t%y0";
22419 /* There is no non-popping store to memory for XFmode.
22420 So if we need one, follow the store with a load. */
22421 if (GET_MODE (operands[0]) == XFmode)
22422 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22424 return "fst%z0\t%y0";
22431 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22432 FP status register is set. */
22435 ix86_emit_fp_unordered_jump (rtx label)
22437 rtx reg = gen_reg_rtx (HImode);
22440 emit_insn (gen_x86_fnstsw_1 (reg));
22442 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22444 emit_insn (gen_x86_sahf_1 (reg));
22446 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22447 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22451 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22453 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22454 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22457 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22458 gen_rtx_LABEL_REF (VOIDmode, label),
22460 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22462 emit_jump_insn (temp);
22463 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22466 /* Output code to perform a log1p XFmode calculation. */
22468 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22470 rtx label1 = gen_label_rtx ();
22471 rtx label2 = gen_label_rtx ();
22473 rtx tmp = gen_reg_rtx (XFmode);
22474 rtx tmp2 = gen_reg_rtx (XFmode);
22476 emit_insn (gen_absxf2 (tmp, op1));
22477 emit_insn (gen_cmpxf (tmp,
22478 CONST_DOUBLE_FROM_REAL_VALUE (
22479 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22481 emit_jump_insn (gen_bge (label1));
22483 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22484 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22485 emit_jump (label2);
22487 emit_label (label1);
22488 emit_move_insn (tmp, CONST1_RTX (XFmode));
22489 emit_insn (gen_addxf3 (tmp, op1, tmp));
22490 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22491 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22493 emit_label (label2);
22496 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22498 static void ATTRIBUTE_UNUSED
22499 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22502 /* With Binutils 2.15, the "@unwind" marker must be specified on
22503 every occurrence of the ".eh_frame" section, not just the first
22506 && strcmp (name, ".eh_frame") == 0)
22508 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22509 flags & SECTION_WRITE ? "aw" : "a");
22512 default_elf_asm_named_section (name, flags, decl);
22515 /* Return the mangling of TYPE if it is an extended fundamental type. */
22517 static const char *
22518 ix86_mangle_fundamental_type (tree type)
22520 switch (TYPE_MODE (type))
22523 /* __float128 is "g". */
22526 /* "long double" or __float80 is "e". */
22533 /* For 32-bit code we can save PIC register setup by using
22534 __stack_chk_fail_local hidden function instead of calling
22535 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22536 register, so it is better to call __stack_chk_fail directly. */
22539 ix86_stack_protect_fail (void)
22541 return TARGET_64BIT
22542 ? default_external_stack_protect_fail ()
22543 : default_hidden_stack_protect_fail ();
22546 /* Select a format to encode pointers in exception handling data. CODE
22547 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22548 true if the symbol may be affected by dynamic relocations.
22550 ??? All x86 object file formats are capable of representing this.
22551 After all, the relocation needed is the same as for the call insn.
22552 Whether or not a particular assembler allows us to enter such, I
22553 guess we'll have to see. */
22555 asm_preferred_eh_data_format (int code, int global)
22559 int type = DW_EH_PE_sdata8;
22561 || ix86_cmodel == CM_SMALL_PIC
22562 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22563 type = DW_EH_PE_sdata4;
22564 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22566 if (ix86_cmodel == CM_SMALL
22567 || (ix86_cmodel == CM_MEDIUM && code))
22568 return DW_EH_PE_udata4;
22569 return DW_EH_PE_absptr;
22572 /* Expand copysign from SIGN to the positive value ABS_VALUE
22573 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22576 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22578 enum machine_mode mode = GET_MODE (sign);
22579 rtx sgn = gen_reg_rtx (mode);
22580 if (mask == NULL_RTX)
22582 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22583 if (!VECTOR_MODE_P (mode))
22585 /* We need to generate a scalar mode mask in this case. */
22586 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22587 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22588 mask = gen_reg_rtx (mode);
22589 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22593 mask = gen_rtx_NOT (mode, mask);
22594 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22595 gen_rtx_AND (mode, mask, sign)));
22596 emit_insn (gen_rtx_SET (VOIDmode, result,
22597 gen_rtx_IOR (mode, abs_value, sgn)));
22600 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22601 mask for masking out the sign-bit is stored in *SMASK, if that is
22604 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22606 enum machine_mode mode = GET_MODE (op0);
22609 xa = gen_reg_rtx (mode);
22610 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22611 if (!VECTOR_MODE_P (mode))
22613 /* We need to generate a scalar mode mask in this case. */
22614 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22615 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22616 mask = gen_reg_rtx (mode);
22617 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22619 emit_insn (gen_rtx_SET (VOIDmode, xa,
22620 gen_rtx_AND (mode, op0, mask)));
22628 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22629 swapping the operands if SWAP_OPERANDS is true. The expanded
22630 code is a forward jump to a newly created label in case the
22631 comparison is true. The generated label rtx is returned. */
22633 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22634 bool swap_operands)
22645 label = gen_label_rtx ();
22646 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22647 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22648 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22649 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22650 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22651 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22652 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22653 JUMP_LABEL (tmp) = label;
22658 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22659 using comparison code CODE. Operands are swapped for the comparison if
22660 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22662 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22663 bool swap_operands)
22665 enum machine_mode mode = GET_MODE (op0);
22666 rtx mask = gen_reg_rtx (mode);
22675 if (mode == DFmode)
22676 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22677 gen_rtx_fmt_ee (code, mode, op0, op1)));
22679 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22680 gen_rtx_fmt_ee (code, mode, op0, op1)));
22685 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22686 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22688 ix86_gen_TWO52 (enum machine_mode mode)
22690 REAL_VALUE_TYPE TWO52r;
22693 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22694 TWO52 = const_double_from_real_value (TWO52r, mode);
22695 TWO52 = force_reg (mode, TWO52);
22700 /* Expand SSE sequence for computing lround from OP1 storing
22703 ix86_expand_lround (rtx op0, rtx op1)
22705 /* C code for the stuff we're doing below:
22706 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22709 enum machine_mode mode = GET_MODE (op1);
22710 const struct real_format *fmt;
22711 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22714 /* load nextafter (0.5, 0.0) */
22715 fmt = REAL_MODE_FORMAT (mode);
22716 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22717 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22719 /* adj = copysign (0.5, op1) */
22720 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22721 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22723 /* adj = op1 + adj */
22724 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22726 /* op0 = (imode)adj */
22727 expand_fix (op0, adj, 0);
22730 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22733 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22735 /* C code for the stuff we're doing below (for do_floor):
22737 xi -= (double)xi > op1 ? 1 : 0;
22740 enum machine_mode fmode = GET_MODE (op1);
22741 enum machine_mode imode = GET_MODE (op0);
22742 rtx ireg, freg, label, tmp;
22744 /* reg = (long)op1 */
22745 ireg = gen_reg_rtx (imode);
22746 expand_fix (ireg, op1, 0);
22748 /* freg = (double)reg */
22749 freg = gen_reg_rtx (fmode);
22750 expand_float (freg, ireg, 0);
22752 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22753 label = ix86_expand_sse_compare_and_jump (UNLE,
22754 freg, op1, !do_floor);
22755 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22756 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22757 emit_move_insn (ireg, tmp);
22759 emit_label (label);
22760 LABEL_NUSES (label) = 1;
22762 emit_move_insn (op0, ireg);
22765 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22766 result in OPERAND0. */
22768 ix86_expand_rint (rtx operand0, rtx operand1)
22770 /* C code for the stuff we're doing below:
22771 xa = fabs (operand1);
22772 if (!isless (xa, 2**52))
22774 xa = xa + 2**52 - 2**52;
22775 return copysign (xa, operand1);
22777 enum machine_mode mode = GET_MODE (operand0);
22778 rtx res, xa, label, TWO52, mask;
22780 res = gen_reg_rtx (mode);
22781 emit_move_insn (res, operand1);
22783 /* xa = abs (operand1) */
22784 xa = ix86_expand_sse_fabs (res, &mask);
22786 /* if (!isless (xa, TWO52)) goto label; */
22787 TWO52 = ix86_gen_TWO52 (mode);
22788 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22790 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22791 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22793 ix86_sse_copysign_to_positive (res, xa, res, mask);
22795 emit_label (label);
22796 LABEL_NUSES (label) = 1;
22798 emit_move_insn (operand0, res);
22801 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22804 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22806 /* C code for the stuff we expand below.
22807 double xa = fabs (x), x2;
22808 if (!isless (xa, TWO52))
22810 xa = xa + TWO52 - TWO52;
22811 x2 = copysign (xa, x);
22820 enum machine_mode mode = GET_MODE (operand0);
22821 rtx xa, TWO52, tmp, label, one, res, mask;
22823 TWO52 = ix86_gen_TWO52 (mode);
22825 /* Temporary for holding the result, initialized to the input
22826 operand to ease control flow. */
22827 res = gen_reg_rtx (mode);
22828 emit_move_insn (res, operand1);
22830 /* xa = abs (operand1) */
22831 xa = ix86_expand_sse_fabs (res, &mask);
22833 /* if (!isless (xa, TWO52)) goto label; */
22834 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22836 /* xa = xa + TWO52 - TWO52; */
22837 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22838 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22840 /* xa = copysign (xa, operand1) */
22841 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22843 /* generate 1.0 or -1.0 */
22844 one = force_reg (mode,
22845 const_double_from_real_value (do_floor
22846 ? dconst1 : dconstm1, mode));
22848 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22849 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22850 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22851 gen_rtx_AND (mode, one, tmp)));
22852 /* We always need to subtract here to preserve signed zero. */
22853 tmp = expand_simple_binop (mode, MINUS,
22854 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22855 emit_move_insn (res, tmp);
22857 emit_label (label);
22858 LABEL_NUSES (label) = 1;
22860 emit_move_insn (operand0, res);
22863 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22866 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22868 /* C code for the stuff we expand below.
22869 double xa = fabs (x), x2;
22870 if (!isless (xa, TWO52))
22872 x2 = (double)(long)x;
22879 if (HONOR_SIGNED_ZEROS (mode))
22880 return copysign (x2, x);
22883 enum machine_mode mode = GET_MODE (operand0);
22884 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22886 TWO52 = ix86_gen_TWO52 (mode);
22888 /* Temporary for holding the result, initialized to the input
22889 operand to ease control flow. */
22890 res = gen_reg_rtx (mode);
22891 emit_move_insn (res, operand1);
22893 /* xa = abs (operand1) */
22894 xa = ix86_expand_sse_fabs (res, &mask);
22896 /* if (!isless (xa, TWO52)) goto label; */
22897 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22899 /* xa = (double)(long)x */
22900 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22901 expand_fix (xi, res, 0);
22902 expand_float (xa, xi, 0);
22905 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22907 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22908 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22909 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22910 gen_rtx_AND (mode, one, tmp)));
22911 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22912 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22913 emit_move_insn (res, tmp);
22915 if (HONOR_SIGNED_ZEROS (mode))
22916 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22918 emit_label (label);
22919 LABEL_NUSES (label) = 1;
22921 emit_move_insn (operand0, res);
22924 /* Expand SSE sequence for computing round from OPERAND1 storing
22925 into OPERAND0. Sequence that works without relying on DImode truncation
22926 via cvttsd2siq that is only available on 64bit targets. */
22928 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22930 /* C code for the stuff we expand below.
22931 double xa = fabs (x), xa2, x2;
22932 if (!isless (xa, TWO52))
22934 Using the absolute value and copying back sign makes
22935 -0.0 -> -0.0 correct.
22936 xa2 = xa + TWO52 - TWO52;
22941 else if (dxa > 0.5)
22943 x2 = copysign (xa2, x);
22946 enum machine_mode mode = GET_MODE (operand0);
22947 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22949 TWO52 = ix86_gen_TWO52 (mode);
22951 /* Temporary for holding the result, initialized to the input
22952 operand to ease control flow. */
22953 res = gen_reg_rtx (mode);
22954 emit_move_insn (res, operand1);
22956 /* xa = abs (operand1) */
22957 xa = ix86_expand_sse_fabs (res, &mask);
22959 /* if (!isless (xa, TWO52)) goto label; */
22960 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22962 /* xa2 = xa + TWO52 - TWO52; */
22963 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22964 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22966 /* dxa = xa2 - xa; */
22967 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22969 /* generate 0.5, 1.0 and -0.5 */
22970 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22971 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22972 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22976 tmp = gen_reg_rtx (mode);
22977 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22978 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22979 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22980 gen_rtx_AND (mode, one, tmp)));
22981 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22982 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22983 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22984 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22985 gen_rtx_AND (mode, one, tmp)));
22986 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22988 /* res = copysign (xa2, operand1) */
22989 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22991 emit_label (label);
22992 LABEL_NUSES (label) = 1;
22994 emit_move_insn (operand0, res);
22997 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23000 ix86_expand_trunc (rtx operand0, rtx operand1)
23002 /* C code for SSE variant we expand below.
23003 double xa = fabs (x), x2;
23004 if (!isless (xa, TWO52))
23006 x2 = (double)(long)x;
23007 if (HONOR_SIGNED_ZEROS (mode))
23008 return copysign (x2, x);
23011 enum machine_mode mode = GET_MODE (operand0);
23012 rtx xa, xi, TWO52, label, res, mask;
23014 TWO52 = ix86_gen_TWO52 (mode);
23016 /* Temporary for holding the result, initialized to the input
23017 operand to ease control flow. */
23018 res = gen_reg_rtx (mode);
23019 emit_move_insn (res, operand1);
23021 /* xa = abs (operand1) */
23022 xa = ix86_expand_sse_fabs (res, &mask);
23024 /* if (!isless (xa, TWO52)) goto label; */
23025 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23027 /* x = (double)(long)x */
23028 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23029 expand_fix (xi, res, 0);
23030 expand_float (res, xi, 0);
23032 if (HONOR_SIGNED_ZEROS (mode))
23033 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23035 emit_label (label);
23036 LABEL_NUSES (label) = 1;
23038 emit_move_insn (operand0, res);
23041 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23044 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23046 enum machine_mode mode = GET_MODE (operand0);
23047 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23049 /* C code for SSE variant we expand below.
23050 double xa = fabs (x), x2;
23051 if (!isless (xa, TWO52))
23053 xa2 = xa + TWO52 - TWO52;
23057 x2 = copysign (xa2, x);
23061 TWO52 = ix86_gen_TWO52 (mode);
23063 /* Temporary for holding the result, initialized to the input
23064 operand to ease control flow. */
23065 res = gen_reg_rtx (mode);
23066 emit_move_insn (res, operand1);
23068 /* xa = abs (operand1) */
23069 xa = ix86_expand_sse_fabs (res, &smask);
23071 /* if (!isless (xa, TWO52)) goto label; */
23072 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23074 /* res = xa + TWO52 - TWO52; */
23075 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23076 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23077 emit_move_insn (res, tmp);
23080 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23082 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23083 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23084 emit_insn (gen_rtx_SET (VOIDmode, mask,
23085 gen_rtx_AND (mode, mask, one)));
23086 tmp = expand_simple_binop (mode, MINUS,
23087 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23088 emit_move_insn (res, tmp);
23090 /* res = copysign (res, operand1) */
23091 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23093 emit_label (label);
23094 LABEL_NUSES (label) = 1;
23096 emit_move_insn (operand0, res);
23099 /* Expand SSE sequence for computing round from OPERAND1 storing
23102 ix86_expand_round (rtx operand0, rtx operand1)
23104 /* C code for the stuff we're doing below:
23105 double xa = fabs (x);
23106 if (!isless (xa, TWO52))
23108 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23109 return copysign (xa, x);
23111 enum machine_mode mode = GET_MODE (operand0);
23112 rtx res, TWO52, xa, label, xi, half, mask;
23113 const struct real_format *fmt;
23114 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23116 /* Temporary for holding the result, initialized to the input
23117 operand to ease control flow. */
23118 res = gen_reg_rtx (mode);
23119 emit_move_insn (res, operand1);
23121 TWO52 = ix86_gen_TWO52 (mode);
23122 xa = ix86_expand_sse_fabs (res, &mask);
23123 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23125 /* load nextafter (0.5, 0.0) */
23126 fmt = REAL_MODE_FORMAT (mode);
23127 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23128 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23130 /* xa = xa + 0.5 */
23131 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23132 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23134 /* xa = (double)(int64_t)xa */
23135 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23136 expand_fix (xi, xa, 0);
23137 expand_float (xa, xi, 0);
23139 /* res = copysign (xa, operand1) */
23140 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23142 emit_label (label);
23143 LABEL_NUSES (label) = 1;
23145 emit_move_insn (operand0, res);
23149 /* Table of valid machine attributes. */
23150 static const struct attribute_spec ix86_attribute_table[] =
23152 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23153 /* Stdcall attribute says callee is responsible for popping arguments
23154 if they are not variable. */
23155 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23156 /* Fastcall attribute says callee is responsible for popping arguments
23157 if they are not variable. */
23158 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23159 /* Cdecl attribute says the callee is a normal C declaration */
23160 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23161 /* Regparm attribute specifies how many integer arguments are to be
23162 passed in registers. */
23163 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23164 /* Sseregparm attribute says we are using x86_64 calling conventions
23165 for FP arguments. */
23166 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23167 /* force_align_arg_pointer says this function realigns the stack at entry. */
23168 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23169 false, true, true, ix86_handle_cconv_attribute },
23170 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23171 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23172 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23173 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23175 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23176 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23177 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23178 SUBTARGET_ATTRIBUTE_TABLE,
23180 { NULL, 0, 0, false, false, false, NULL }
23183 /* Initialize the GCC target structure. */
23184 #undef TARGET_ATTRIBUTE_TABLE
23185 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23186 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23187 # undef TARGET_MERGE_DECL_ATTRIBUTES
23188 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23191 #undef TARGET_COMP_TYPE_ATTRIBUTES
23192 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23194 #undef TARGET_INIT_BUILTINS
23195 #define TARGET_INIT_BUILTINS ix86_init_builtins
23196 #undef TARGET_EXPAND_BUILTIN
23197 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23199 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23200 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
23201 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23202 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
23204 #undef TARGET_ASM_FUNCTION_EPILOGUE
23205 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23207 #undef TARGET_ENCODE_SECTION_INFO
23208 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23209 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23211 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23214 #undef TARGET_ASM_OPEN_PAREN
23215 #define TARGET_ASM_OPEN_PAREN ""
23216 #undef TARGET_ASM_CLOSE_PAREN
23217 #define TARGET_ASM_CLOSE_PAREN ""
23219 #undef TARGET_ASM_ALIGNED_HI_OP
23220 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23221 #undef TARGET_ASM_ALIGNED_SI_OP
23222 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23224 #undef TARGET_ASM_ALIGNED_DI_OP
23225 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23228 #undef TARGET_ASM_UNALIGNED_HI_OP
23229 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23230 #undef TARGET_ASM_UNALIGNED_SI_OP
23231 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23232 #undef TARGET_ASM_UNALIGNED_DI_OP
23233 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23235 #undef TARGET_SCHED_ADJUST_COST
23236 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23237 #undef TARGET_SCHED_ISSUE_RATE
23238 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23239 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23240 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23241 ia32_multipass_dfa_lookahead
23243 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23244 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23247 #undef TARGET_HAVE_TLS
23248 #define TARGET_HAVE_TLS true
23250 #undef TARGET_CANNOT_FORCE_CONST_MEM
23251 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23252 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23253 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23255 #undef TARGET_DELEGITIMIZE_ADDRESS
23256 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23258 #undef TARGET_MS_BITFIELD_LAYOUT_P
23259 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23262 #undef TARGET_BINDS_LOCAL_P
23263 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23265 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23266 #undef TARGET_BINDS_LOCAL_P
23267 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23270 #undef TARGET_ASM_OUTPUT_MI_THUNK
23271 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23272 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23273 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23275 #undef TARGET_ASM_FILE_START
23276 #define TARGET_ASM_FILE_START x86_file_start
23278 #undef TARGET_DEFAULT_TARGET_FLAGS
23279 #define TARGET_DEFAULT_TARGET_FLAGS \
23281 | TARGET_SUBTARGET_DEFAULT \
23282 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23284 #undef TARGET_HANDLE_OPTION
23285 #define TARGET_HANDLE_OPTION ix86_handle_option
23287 #undef TARGET_RTX_COSTS
23288 #define TARGET_RTX_COSTS ix86_rtx_costs
23289 #undef TARGET_ADDRESS_COST
23290 #define TARGET_ADDRESS_COST ix86_address_cost
23292 #undef TARGET_FIXED_CONDITION_CODE_REGS
23293 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23294 #undef TARGET_CC_MODES_COMPATIBLE
23295 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23297 #undef TARGET_MACHINE_DEPENDENT_REORG
23298 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23300 #undef TARGET_BUILD_BUILTIN_VA_LIST
23301 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23303 #undef TARGET_MD_ASM_CLOBBERS
23304 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23306 #undef TARGET_PROMOTE_PROTOTYPES
23307 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23308 #undef TARGET_STRUCT_VALUE_RTX
23309 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23310 #undef TARGET_SETUP_INCOMING_VARARGS
23311 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23312 #undef TARGET_MUST_PASS_IN_STACK
23313 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23314 #undef TARGET_PASS_BY_REFERENCE
23315 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23316 #undef TARGET_INTERNAL_ARG_POINTER
23317 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23318 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23319 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23320 #undef TARGET_STRICT_ARGUMENT_NAMING
23321 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23323 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23324 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23326 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23327 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23330 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23333 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23334 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23337 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23338 #undef TARGET_INSERT_ATTRIBUTES
23339 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23342 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23343 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23345 #undef TARGET_STACK_PROTECT_FAIL
23346 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23348 #undef TARGET_FUNCTION_VALUE
23349 #define TARGET_FUNCTION_VALUE ix86_function_value
23351 struct gcc_target targetm = TARGET_INITIALIZER;
23353 #include "gt-i386.h"