1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 comopared to K8. Alignment becomes important after 8 bytes for mempcy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
608 struct processor_costs pentium4_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (3), /* cost of a lea instruction */
611 COSTS_N_INSNS (4), /* variable shift costs */
612 COSTS_N_INSNS (4), /* constant shift costs */
613 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (15), /* HI */
615 COSTS_N_INSNS (15), /* SI */
616 COSTS_N_INSNS (15), /* DI */
617 COSTS_N_INSNS (15)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (56), /* HI */
621 COSTS_N_INSNS (56), /* SI */
622 COSTS_N_INSNS (56), /* DI */
623 COSTS_N_INSNS (56)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 16, /* "large" insn */
628 2, /* cost for loading QImode using movzbl */
629 {4, 5, 4}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {2, 3, 2}, /* cost of storing integer registers */
633 2, /* cost of reg,reg fld/fst */
634 {2, 2, 6}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {4, 4, 6}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {2, 2}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {2, 2}, /* cost of storing MMX registers
642 in SImode and DImode */
643 12, /* cost of moving SSE register */
644 {12, 12, 12}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {2, 2, 8}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 10, /* MMX or SSE register to integer */
649 64, /* size of prefetch block */
650 6, /* number of parallel prefetches */
652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
658 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
659 {libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}},
660 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
661 {libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}}
665 struct processor_costs nocona_cost = {
666 COSTS_N_INSNS (1), /* cost of an add instruction */
667 COSTS_N_INSNS (1), /* cost of a lea instruction */
668 COSTS_N_INSNS (1), /* variable shift costs */
669 COSTS_N_INSNS (1), /* constant shift costs */
670 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
671 COSTS_N_INSNS (10), /* HI */
672 COSTS_N_INSNS (10), /* SI */
673 COSTS_N_INSNS (10), /* DI */
674 COSTS_N_INSNS (10)}, /* other */
675 0, /* cost of multiply per each bit set */
676 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
677 COSTS_N_INSNS (66), /* HI */
678 COSTS_N_INSNS (66), /* SI */
679 COSTS_N_INSNS (66), /* DI */
680 COSTS_N_INSNS (66)}, /* other */
681 COSTS_N_INSNS (1), /* cost of movsx */
682 COSTS_N_INSNS (1), /* cost of movzx */
683 16, /* "large" insn */
685 4, /* cost for loading QImode using movzbl */
686 {4, 4, 4}, /* cost of loading integer registers
687 in QImode, HImode and SImode.
688 Relative to reg-reg move (2). */
689 {4, 4, 4}, /* cost of storing integer registers */
690 3, /* cost of reg,reg fld/fst */
691 {12, 12, 12}, /* cost of loading fp registers
692 in SFmode, DFmode and XFmode */
693 {4, 4, 4}, /* cost of storing fp registers
694 in SFmode, DFmode and XFmode */
695 6, /* cost of moving MMX register */
696 {12, 12}, /* cost of loading MMX registers
697 in SImode and DImode */
698 {12, 12}, /* cost of storing MMX registers
699 in SImode and DImode */
700 6, /* cost of moving SSE register */
701 {12, 12, 12}, /* cost of loading SSE registers
702 in SImode, DImode and TImode */
703 {12, 12, 12}, /* cost of storing SSE registers
704 in SImode, DImode and TImode */
705 8, /* MMX or SSE register to integer */
706 128, /* size of prefetch block */
707 8, /* number of parallel prefetches */
709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
715 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
716 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
717 {100000, unrolled_loop}, {-1, libcall}}}},
718 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
719 {libcall, {{24, loop}, {64, unrolled_loop},
720 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
724 struct processor_costs core2_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
727 COSTS_N_INSNS (1), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (3), /* HI */
731 COSTS_N_INSNS (3), /* SI */
732 COSTS_N_INSNS (3), /* DI */
733 COSTS_N_INSNS (3)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (22), /* HI */
737 COSTS_N_INSNS (22), /* SI */
738 COSTS_N_INSNS (22), /* DI */
739 COSTS_N_INSNS (22)}, /* other */
740 COSTS_N_INSNS (1), /* cost of movsx */
741 COSTS_N_INSNS (1), /* cost of movzx */
742 8, /* "large" insn */
744 2, /* cost for loading QImode using movzbl */
745 {6, 6, 6}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {4, 4, 4}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {6, 6, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 4}, /* cost of loading integer registers */
753 2, /* cost of moving MMX register */
754 {6, 6}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {4, 4}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {6, 6, 6}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {4, 4, 4}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 2, /* MMX or SSE register to integer */
764 128, /* size of prefetch block */
765 8, /* number of parallel prefetches */
767 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (1), /* cost of FABS instruction. */
771 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
773 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
774 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
775 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
776 {{libcall, {{8, loop}, {15, unrolled_loop},
777 {2048, rep_prefix_4_byte}, {-1, libcall}}},
778 {libcall, {{24, loop}, {32, unrolled_loop},
779 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
782 /* Generic64 should produce code tuned for Nocona and K8. */
784 struct processor_costs generic64_cost = {
785 COSTS_N_INSNS (1), /* cost of an add instruction */
786 /* On all chips taken into consideration lea is 2 cycles and more. With
787 this cost however our current implementation of synth_mult results in
788 use of unnecessary temporary registers causing regression on several
789 SPECfp benchmarks. */
790 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
791 COSTS_N_INSNS (1), /* variable shift costs */
792 COSTS_N_INSNS (1), /* constant shift costs */
793 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
794 COSTS_N_INSNS (4), /* HI */
795 COSTS_N_INSNS (3), /* SI */
796 COSTS_N_INSNS (4), /* DI */
797 COSTS_N_INSNS (2)}, /* other */
798 0, /* cost of multiply per each bit set */
799 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
800 COSTS_N_INSNS (26), /* HI */
801 COSTS_N_INSNS (42), /* SI */
802 COSTS_N_INSNS (74), /* DI */
803 COSTS_N_INSNS (74)}, /* other */
804 COSTS_N_INSNS (1), /* cost of movsx */
805 COSTS_N_INSNS (1), /* cost of movzx */
806 8, /* "large" insn */
808 4, /* cost for loading QImode using movzbl */
809 {4, 4, 4}, /* cost of loading integer registers
810 in QImode, HImode and SImode.
811 Relative to reg-reg move (2). */
812 {4, 4, 4}, /* cost of storing integer registers */
813 4, /* cost of reg,reg fld/fst */
814 {12, 12, 12}, /* cost of loading fp registers
815 in SFmode, DFmode and XFmode */
816 {6, 6, 8}, /* cost of storing fp registers
817 in SFmode, DFmode and XFmode */
818 2, /* cost of moving MMX register */
819 {8, 8}, /* cost of loading MMX registers
820 in SImode and DImode */
821 {8, 8}, /* cost of storing MMX registers
822 in SImode and DImode */
823 2, /* cost of moving SSE register */
824 {8, 8, 8}, /* cost of loading SSE registers
825 in SImode, DImode and TImode */
826 {8, 8, 8}, /* cost of storing SSE registers
827 in SImode, DImode and TImode */
828 5, /* MMX or SSE register to integer */
829 64, /* size of prefetch block */
830 6, /* number of parallel prefetches */
831 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
832 is increased to perhaps more appropriate value of 5. */
834 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
835 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
836 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
837 COSTS_N_INSNS (8), /* cost of FABS instruction. */
838 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
839 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
840 {DUMMY_STRINGOP_ALGS,
841 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
842 {DUMMY_STRINGOP_ALGS,
843 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
846 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
848 struct processor_costs generic32_cost = {
849 COSTS_N_INSNS (1), /* cost of an add instruction */
850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
851 COSTS_N_INSNS (1), /* variable shift costs */
852 COSTS_N_INSNS (1), /* constant shift costs */
853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
854 COSTS_N_INSNS (4), /* HI */
855 COSTS_N_INSNS (3), /* SI */
856 COSTS_N_INSNS (4), /* DI */
857 COSTS_N_INSNS (2)}, /* other */
858 0, /* cost of multiply per each bit set */
859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
860 COSTS_N_INSNS (26), /* HI */
861 COSTS_N_INSNS (42), /* SI */
862 COSTS_N_INSNS (74), /* DI */
863 COSTS_N_INSNS (74)}, /* other */
864 COSTS_N_INSNS (1), /* cost of movsx */
865 COSTS_N_INSNS (1), /* cost of movzx */
866 8, /* "large" insn */
868 4, /* cost for loading QImode using movzbl */
869 {4, 4, 4}, /* cost of loading integer registers
870 in QImode, HImode and SImode.
871 Relative to reg-reg move (2). */
872 {4, 4, 4}, /* cost of storing integer registers */
873 4, /* cost of reg,reg fld/fst */
874 {12, 12, 12}, /* cost of loading fp registers
875 in SFmode, DFmode and XFmode */
876 {6, 6, 8}, /* cost of storing fp registers
877 in SFmode, DFmode and XFmode */
878 2, /* cost of moving MMX register */
879 {8, 8}, /* cost of loading MMX registers
880 in SImode and DImode */
881 {8, 8}, /* cost of storing MMX registers
882 in SImode and DImode */
883 2, /* cost of moving SSE register */
884 {8, 8, 8}, /* cost of loading SSE registers
885 in SImode, DImode and TImode */
886 {8, 8, 8}, /* cost of storing SSE registers
887 in SImode, DImode and TImode */
888 5, /* MMX or SSE register to integer */
889 64, /* size of prefetch block */
890 6, /* number of parallel prefetches */
892 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
893 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
894 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
895 COSTS_N_INSNS (8), /* cost of FABS instruction. */
896 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
897 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
898 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
899 DUMMY_STRINGOP_ALGS},
900 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
901 DUMMY_STRINGOP_ALGS},
904 const struct processor_costs *ix86_cost = &pentium_cost;
906 /* Processor feature/optimization bitmasks. */
907 #define m_386 (1<<PROCESSOR_I386)
908 #define m_486 (1<<PROCESSOR_I486)
909 #define m_PENT (1<<PROCESSOR_PENTIUM)
910 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
911 #define m_GEODE (1<<PROCESSOR_GEODE)
912 #define m_K6_GEODE (m_K6 | m_GEODE)
913 #define m_K6 (1<<PROCESSOR_K6)
914 #define m_ATHLON (1<<PROCESSOR_ATHLON)
915 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
916 #define m_K8 (1<<PROCESSOR_K8)
917 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
918 #define m_NOCONA (1<<PROCESSOR_NOCONA)
919 #define m_CORE2 (1<<PROCESSOR_CORE2)
920 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
921 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
922 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
924 /* Generic instruction choice should be common subset of supported CPUs
925 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
927 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
928 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
929 generic because it is not working well with PPro base chips. */
930 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
931 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
932 const int x86_zero_extend_with_and = m_486 | m_PENT;
933 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
934 const int x86_double_with_add = ~m_386;
935 const int x86_use_bit_test = m_386;
936 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
937 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
938 const int x86_3dnow_a = m_ATHLON_K8;
939 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
940 /* Branch hints were put in P4 based on simulation result. But
941 after P4 was made, no performance benefit was observed with
942 branch hints. It also increases the code size. As the result,
943 icc never generates branch hints. */
944 const int x86_branch_hints = 0;
945 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
946 /* We probably ought to watch for partial register stalls on Generic32
947 compilation setting as well. However in current implementation the
948 partial register stalls are not eliminated very well - they can
949 be introduced via subregs synthesized by combine and can happen
950 in caller/callee saving sequences.
951 Because this option pays back little on PPro based chips and is in conflict
952 with partial reg. dependencies used by Athlon/P4 based chips, it is better
953 to leave it off for generic32 for now. */
954 const int x86_partial_reg_stall = m_PPRO;
955 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
956 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
957 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
958 const int x86_use_mov0 = m_K6;
959 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
960 const int x86_read_modify_write = ~m_PENT;
961 const int x86_read_modify = ~(m_PENT | m_PPRO);
962 const int x86_split_long_moves = m_PPRO;
963 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
964 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
965 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
966 const int x86_qimode_math = ~(0);
967 const int x86_promote_qi_regs = 0;
968 /* On PPro this flag is meant to avoid partial register stalls. Just like
969 the x86_partial_reg_stall this option might be considered for Generic32
970 if our scheme for avoiding partial stalls was more effective. */
971 const int x86_himode_math = ~(m_PPRO);
972 const int x86_promote_hi_regs = m_PPRO;
973 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
974 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
976 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
977 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
978 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
980 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
981 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
982 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
983 const int x86_shift1 = ~m_486;
984 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
985 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
986 that thread 128bit SSE registers as single units versus K8 based chips that
987 divide SSE registers to two 64bit halves.
988 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
989 to allow register renaming on 128bit SSE units, but usually results in one
990 extra microop on 64bit SSE units. Experimental results shows that disabling
991 this option on P4 brings over 20% SPECfp regression, while enabling it on
992 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
994 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
995 /* Set for machines where the type and dependencies are resolved on SSE
996 register parts instead of whole registers, so we may maintain just
997 lower part of scalar values in proper format leaving the upper part
999 const int x86_sse_split_regs = m_ATHLON_K8;
1000 const int x86_sse_typeless_stores = m_ATHLON_K8;
1001 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1002 const int x86_use_ffreep = m_ATHLON_K8;
1003 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1005 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1006 integer data in xmm registers. Which results in pretty abysmal code. */
1007 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1009 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1010 /* Some CPU cores are not able to predict more than 4 branch instructions in
1011 the 16 byte window. */
1012 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1013 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
1014 const int x86_use_bt = m_ATHLON_K8;
1015 /* Compare and exchange was added for 80486. */
1016 const int x86_cmpxchg = ~m_386;
1017 /* Compare and exchange 8 bytes was added for pentium. */
1018 const int x86_cmpxchg8b = ~(m_386 | m_486);
1019 /* Compare and exchange 16 bytes was added for nocona. */
1020 const int x86_cmpxchg16b = m_NOCONA;
1021 /* Exchange and add was added for 80486. */
1022 const int x86_xadd = ~m_386;
1023 /* Byteswap was added for 80486. */
1024 const int x86_bswap = ~m_386;
1025 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
1027 static enum stringop_alg stringop_alg = no_stringop;
1029 /* In case the average insn count for single function invocation is
1030 lower than this constant, emit fast (but longer) prologue and
1032 #define FAST_PROLOGUE_INSN_COUNT 20
1034 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1035 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1036 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1037 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1039 /* Array of the smallest class containing reg number REGNO, indexed by
1040 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1042 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1044 /* ax, dx, cx, bx */
1045 AREG, DREG, CREG, BREG,
1046 /* si, di, bp, sp */
1047 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1049 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1050 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1053 /* flags, fpsr, fpcr, dirflag, frame */
1054 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1055 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1057 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1059 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1060 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1061 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1065 /* The "default" register map used in 32bit mode. */
1067 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1069 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1070 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1071 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1072 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1073 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1074 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1075 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1078 static int const x86_64_int_parameter_registers[6] =
1080 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1081 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1084 static int const x86_64_int_return_registers[4] =
1086 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1089 /* The "default" register map used in 64bit mode. */
1090 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1092 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1093 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1094 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1095 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1096 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1097 8,9,10,11,12,13,14,15, /* extended integer registers */
1098 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1101 /* Define the register numbers to be used in Dwarf debugging information.
1102 The SVR4 reference port C compiler uses the following register numbers
1103 in its Dwarf output code:
1104 0 for %eax (gcc regno = 0)
1105 1 for %ecx (gcc regno = 2)
1106 2 for %edx (gcc regno = 1)
1107 3 for %ebx (gcc regno = 3)
1108 4 for %esp (gcc regno = 7)
1109 5 for %ebp (gcc regno = 6)
1110 6 for %esi (gcc regno = 4)
1111 7 for %edi (gcc regno = 5)
1112 The following three DWARF register numbers are never generated by
1113 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1114 believes these numbers have these meanings.
1115 8 for %eip (no gcc equivalent)
1116 9 for %eflags (gcc regno = 17)
1117 10 for %trapno (no gcc equivalent)
1118 It is not at all clear how we should number the FP stack registers
1119 for the x86 architecture. If the version of SDB on x86/svr4 were
1120 a bit less brain dead with respect to floating-point then we would
1121 have a precedent to follow with respect to DWARF register numbers
1122 for x86 FP registers, but the SDB on x86/svr4 is so completely
1123 broken with respect to FP registers that it is hardly worth thinking
1124 of it as something to strive for compatibility with.
1125 The version of x86/svr4 SDB I have at the moment does (partially)
1126 seem to believe that DWARF register number 11 is associated with
1127 the x86 register %st(0), but that's about all. Higher DWARF
1128 register numbers don't seem to be associated with anything in
1129 particular, and even for DWARF regno 11, SDB only seems to under-
1130 stand that it should say that a variable lives in %st(0) (when
1131 asked via an `=' command) if we said it was in DWARF regno 11,
1132 but SDB still prints garbage when asked for the value of the
1133 variable in question (via a `/' command).
1134 (Also note that the labels SDB prints for various FP stack regs
1135 when doing an `x' command are all wrong.)
1136 Note that these problems generally don't affect the native SVR4
1137 C compiler because it doesn't allow the use of -O with -g and
1138 because when it is *not* optimizing, it allocates a memory
1139 location for each floating-point variable, and the memory
1140 location is what gets described in the DWARF AT_location
1141 attribute for the variable in question.
1142 Regardless of the severe mental illness of the x86/svr4 SDB, we
1143 do something sensible here and we use the following DWARF
1144 register numbers. Note that these are all stack-top-relative
1146 11 for %st(0) (gcc regno = 8)
1147 12 for %st(1) (gcc regno = 9)
1148 13 for %st(2) (gcc regno = 10)
1149 14 for %st(3) (gcc regno = 11)
1150 15 for %st(4) (gcc regno = 12)
1151 16 for %st(5) (gcc regno = 13)
1152 17 for %st(6) (gcc regno = 14)
1153 18 for %st(7) (gcc regno = 15)
1155 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1157 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1158 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1159 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1160 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1161 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1162 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1166 /* Test and compare insns in i386.md store the information needed to
1167 generate branch and scc insns here. */
1169 rtx ix86_compare_op0 = NULL_RTX;
1170 rtx ix86_compare_op1 = NULL_RTX;
1171 rtx ix86_compare_emitted = NULL_RTX;
1173 /* Size of the register save area. */
1174 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1176 /* Define the structure for the machine field in struct function. */
1178 struct stack_local_entry GTY(())
1180 unsigned short mode;
1183 struct stack_local_entry *next;
1186 /* Structure describing stack frame layout.
1187 Stack grows downward:
1193 saved frame pointer if frame_pointer_needed
1194 <- HARD_FRAME_POINTER
1199 [va_arg registers] (
1200 > to_allocate <- FRAME_POINTER
1210 HOST_WIDE_INT frame;
1212 int outgoing_arguments_size;
1215 HOST_WIDE_INT to_allocate;
1216 /* The offsets relative to ARG_POINTER. */
1217 HOST_WIDE_INT frame_pointer_offset;
1218 HOST_WIDE_INT hard_frame_pointer_offset;
1219 HOST_WIDE_INT stack_pointer_offset;
1221 /* When save_regs_using_mov is set, emit prologue using
1222 move instead of push instructions. */
1223 bool save_regs_using_mov;
1226 /* Code model option. */
1227 enum cmodel ix86_cmodel;
1229 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1231 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1233 /* Which unit we are generating floating point math for. */
1234 enum fpmath_unit ix86_fpmath;
1236 /* Which cpu are we scheduling for. */
1237 enum processor_type ix86_tune;
1238 /* Which instruction set architecture to use. */
1239 enum processor_type ix86_arch;
1241 /* true if sse prefetch instruction is not NOOP. */
1242 int x86_prefetch_sse;
1244 /* ix86_regparm_string as a number */
1245 static int ix86_regparm;
1247 /* -mstackrealign option */
1248 extern int ix86_force_align_arg_pointer;
1249 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1251 /* Preferred alignment for stack boundary in bits. */
1252 unsigned int ix86_preferred_stack_boundary;
1254 /* Values 1-5: see jump.c */
1255 int ix86_branch_cost;
1257 /* Variables which are this size or smaller are put in the data/bss
1258 or ldata/lbss sections. */
1260 int ix86_section_threshold = 65536;
1262 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1263 char internal_label_prefix[16];
1264 int internal_label_prefix_len;
1266 static bool ix86_handle_option (size_t, const char *, int);
1267 static void output_pic_addr_const (FILE *, rtx, int);
1268 static void put_condition_code (enum rtx_code, enum machine_mode,
1270 static const char *get_some_local_dynamic_name (void);
1271 static int get_some_local_dynamic_name_1 (rtx *, void *);
1272 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1273 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1275 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1276 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1278 static rtx get_thread_pointer (int);
1279 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1280 static void get_pc_thunk_name (char [32], unsigned int);
1281 static rtx gen_push (rtx);
1282 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1283 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1284 static struct machine_function * ix86_init_machine_status (void);
1285 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1286 static int ix86_nsaved_regs (void);
1287 static void ix86_emit_save_regs (void);
1288 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1289 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1290 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1291 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1292 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1293 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1294 static int ix86_issue_rate (void);
1295 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1296 static int ia32_multipass_dfa_lookahead (void);
1297 static void ix86_init_mmx_sse_builtins (void);
1298 static rtx x86_this_parameter (tree);
1299 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1300 HOST_WIDE_INT, tree);
1301 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1302 static void x86_file_start (void);
1303 static void ix86_reorg (void);
1304 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1305 static tree ix86_build_builtin_va_list (void);
1306 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1308 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1309 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1310 static bool ix86_vector_mode_supported_p (enum machine_mode);
1312 static int ix86_address_cost (rtx);
1313 static bool ix86_cannot_force_const_mem (rtx);
1314 static rtx ix86_delegitimize_address (rtx);
1316 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1318 struct builtin_description;
1319 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1321 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1323 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1324 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1325 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1326 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1327 static rtx safe_vector_operand (rtx, enum machine_mode);
1328 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1329 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1330 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1331 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1332 static int ix86_fp_comparison_cost (enum rtx_code code);
1333 static unsigned int ix86_select_alt_pic_regnum (void);
1334 static int ix86_save_reg (unsigned int, int);
1335 static void ix86_compute_frame_layout (struct ix86_frame *);
1336 static int ix86_comp_type_attributes (tree, tree);
1337 static int ix86_function_regparm (tree, tree);
1338 const struct attribute_spec ix86_attribute_table[];
1339 static bool ix86_function_ok_for_sibcall (tree, tree);
1340 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1341 static int ix86_value_regno (enum machine_mode, tree, tree);
1342 static bool contains_128bit_aligned_vector_p (tree);
1343 static rtx ix86_struct_value_rtx (tree, int);
1344 static bool ix86_ms_bitfield_layout_p (tree);
1345 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1346 static int extended_reg_mentioned_1 (rtx *, void *);
1347 static bool ix86_rtx_costs (rtx, int, int, int *);
1348 static int min_insn_size (rtx);
1349 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1350 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1351 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1353 static void ix86_init_builtins (void);
1354 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1355 static const char *ix86_mangle_fundamental_type (tree);
1356 static tree ix86_stack_protect_fail (void);
1357 static rtx ix86_internal_arg_pointer (void);
1358 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1360 /* This function is only used on Solaris. */
1361 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1364 /* Register class used for passing given 64bit part of the argument.
1365 These represent classes as documented by the PS ABI, with the exception
1366 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1367 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1369 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1370 whenever possible (upper half does contain padding).
1372 enum x86_64_reg_class
1375 X86_64_INTEGER_CLASS,
1376 X86_64_INTEGERSI_CLASS,
1383 X86_64_COMPLEX_X87_CLASS,
1386 static const char * const x86_64_reg_class_name[] = {
1387 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1388 "sseup", "x87", "x87up", "cplx87", "no"
1391 #define MAX_CLASSES 4
1393 /* Table of constants used by fldpi, fldln2, etc.... */
1394 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1395 static bool ext_80387_constants_init = 0;
1396 static void init_ext_80387_constants (void);
1397 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1398 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1399 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1400 static section *x86_64_elf_select_section (tree decl, int reloc,
1401 unsigned HOST_WIDE_INT align)
1404 /* Initialize the GCC target structure. */
1405 #undef TARGET_ATTRIBUTE_TABLE
1406 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1407 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1408 # undef TARGET_MERGE_DECL_ATTRIBUTES
1409 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1412 #undef TARGET_COMP_TYPE_ATTRIBUTES
1413 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1415 #undef TARGET_INIT_BUILTINS
1416 #define TARGET_INIT_BUILTINS ix86_init_builtins
1417 #undef TARGET_EXPAND_BUILTIN
1418 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1420 #undef TARGET_ASM_FUNCTION_EPILOGUE
1421 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1423 #undef TARGET_ENCODE_SECTION_INFO
1424 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1425 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1427 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1430 #undef TARGET_ASM_OPEN_PAREN
1431 #define TARGET_ASM_OPEN_PAREN ""
1432 #undef TARGET_ASM_CLOSE_PAREN
1433 #define TARGET_ASM_CLOSE_PAREN ""
1435 #undef TARGET_ASM_ALIGNED_HI_OP
1436 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1437 #undef TARGET_ASM_ALIGNED_SI_OP
1438 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1440 #undef TARGET_ASM_ALIGNED_DI_OP
1441 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1444 #undef TARGET_ASM_UNALIGNED_HI_OP
1445 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1446 #undef TARGET_ASM_UNALIGNED_SI_OP
1447 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1448 #undef TARGET_ASM_UNALIGNED_DI_OP
1449 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1451 #undef TARGET_SCHED_ADJUST_COST
1452 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1453 #undef TARGET_SCHED_ISSUE_RATE
1454 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1455 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1456 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1457 ia32_multipass_dfa_lookahead
1459 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1460 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1463 #undef TARGET_HAVE_TLS
1464 #define TARGET_HAVE_TLS true
1466 #undef TARGET_CANNOT_FORCE_CONST_MEM
1467 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1468 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1469 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1471 #undef TARGET_DELEGITIMIZE_ADDRESS
1472 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1474 #undef TARGET_MS_BITFIELD_LAYOUT_P
1475 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1478 #undef TARGET_BINDS_LOCAL_P
1479 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1482 #undef TARGET_ASM_OUTPUT_MI_THUNK
1483 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1487 #undef TARGET_ASM_FILE_START
1488 #define TARGET_ASM_FILE_START x86_file_start
1490 #undef TARGET_DEFAULT_TARGET_FLAGS
1491 #define TARGET_DEFAULT_TARGET_FLAGS \
1493 | TARGET_64BIT_DEFAULT \
1494 | TARGET_SUBTARGET_DEFAULT \
1495 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1497 #undef TARGET_HANDLE_OPTION
1498 #define TARGET_HANDLE_OPTION ix86_handle_option
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS ix86_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST ix86_address_cost
1505 #undef TARGET_FIXED_CONDITION_CODE_REGS
1506 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1507 #undef TARGET_CC_MODES_COMPATIBLE
1508 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1510 #undef TARGET_MACHINE_DEPENDENT_REORG
1511 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1513 #undef TARGET_BUILD_BUILTIN_VA_LIST
1514 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1516 #undef TARGET_MD_ASM_CLOBBERS
1517 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1519 #undef TARGET_PROMOTE_PROTOTYPES
1520 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1521 #undef TARGET_STRUCT_VALUE_RTX
1522 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1523 #undef TARGET_SETUP_INCOMING_VARARGS
1524 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1525 #undef TARGET_MUST_PASS_IN_STACK
1526 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1527 #undef TARGET_PASS_BY_REFERENCE
1528 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1529 #undef TARGET_INTERNAL_ARG_POINTER
1530 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1531 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1532 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1534 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1535 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1537 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1538 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1540 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1541 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1548 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1549 #undef TARGET_INSERT_ATTRIBUTES
1550 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1553 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1554 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1556 #undef TARGET_STACK_PROTECT_FAIL
1557 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1559 #undef TARGET_FUNCTION_VALUE
1560 #define TARGET_FUNCTION_VALUE ix86_function_value
1562 struct gcc_target targetm = TARGET_INITIALIZER;
1565 /* The svr4 ABI for the i386 says that records and unions are returned
1567 #ifndef DEFAULT_PCC_STRUCT_RETURN
1568 #define DEFAULT_PCC_STRUCT_RETURN 1
1571 /* Implement TARGET_HANDLE_OPTION. */
1574 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1581 target_flags &= ~MASK_3DNOW_A;
1582 target_flags_explicit |= MASK_3DNOW_A;
1589 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1590 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1597 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1598 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1605 target_flags &= ~MASK_SSE3;
1606 target_flags_explicit |= MASK_SSE3;
1615 /* Sometimes certain combinations of command options do not make
1616 sense on a particular target machine. You can define a macro
1617 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1618 defined, is executed once just after all the command options have
1621 Don't use this macro to turn on various extra optimizations for
1622 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1625 override_options (void)
1628 int ix86_tune_defaulted = 0;
1630 /* Comes from final.c -- no real reason to change it. */
1631 #define MAX_CODE_ALIGN 16
1635 const struct processor_costs *cost; /* Processor costs */
1636 const int target_enable; /* Target flags to enable. */
1637 const int target_disable; /* Target flags to disable. */
1638 const int align_loop; /* Default alignments. */
1639 const int align_loop_max_skip;
1640 const int align_jump;
1641 const int align_jump_max_skip;
1642 const int align_func;
1644 const processor_target_table[PROCESSOR_max] =
1646 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1647 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1648 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1649 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1650 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1651 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1652 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1654 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1655 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1656 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1657 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1658 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1661 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1664 const char *const name; /* processor name or nickname. */
1665 const enum processor_type processor;
1666 const enum pta_flags
1672 PTA_PREFETCH_SSE = 16,
1679 const processor_alias_table[] =
1681 {"i386", PROCESSOR_I386, 0},
1682 {"i486", PROCESSOR_I486, 0},
1683 {"i586", PROCESSOR_PENTIUM, 0},
1684 {"pentium", PROCESSOR_PENTIUM, 0},
1685 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1686 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1687 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1688 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1689 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1690 {"i686", PROCESSOR_PENTIUMPRO, 0},
1691 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1692 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1693 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1694 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1695 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1696 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1697 | PTA_MMX | PTA_PREFETCH_SSE},
1698 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1699 | PTA_MMX | PTA_PREFETCH_SSE},
1700 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1701 | PTA_MMX | PTA_PREFETCH_SSE},
1702 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1703 | PTA_MMX | PTA_PREFETCH_SSE},
1704 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1705 | PTA_64BIT | PTA_MMX
1706 | PTA_PREFETCH_SSE},
1707 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1709 {"k6", PROCESSOR_K6, PTA_MMX},
1710 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1711 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1712 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1714 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1715 | PTA_3DNOW | PTA_3DNOW_A},
1716 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1717 | PTA_3DNOW_A | PTA_SSE},
1718 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1719 | PTA_3DNOW_A | PTA_SSE},
1720 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_3DNOW_A | PTA_SSE},
1722 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1723 | PTA_SSE | PTA_SSE2 },
1724 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1725 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1726 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1727 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1728 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1729 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1730 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1731 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1732 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1733 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1736 int const pta_size = ARRAY_SIZE (processor_alias_table);
1738 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1739 SUBTARGET_OVERRIDE_OPTIONS;
1742 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1743 SUBSUBTARGET_OVERRIDE_OPTIONS;
1746 /* -fPIC is the default for x86_64. */
1747 if (TARGET_MACHO && TARGET_64BIT)
1750 /* Set the default values for switches whose default depends on TARGET_64BIT
1751 in case they weren't overwritten by command line options. */
1754 /* Mach-O doesn't support omitting the frame pointer for now. */
1755 if (flag_omit_frame_pointer == 2)
1756 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1757 if (flag_asynchronous_unwind_tables == 2)
1758 flag_asynchronous_unwind_tables = 1;
1759 if (flag_pcc_struct_return == 2)
1760 flag_pcc_struct_return = 0;
1764 if (flag_omit_frame_pointer == 2)
1765 flag_omit_frame_pointer = 0;
1766 if (flag_asynchronous_unwind_tables == 2)
1767 flag_asynchronous_unwind_tables = 0;
1768 if (flag_pcc_struct_return == 2)
1769 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1772 /* Need to check -mtune=generic first. */
1773 if (ix86_tune_string)
1775 if (!strcmp (ix86_tune_string, "generic")
1776 || !strcmp (ix86_tune_string, "i686")
1777 /* As special support for cross compilers we read -mtune=native
1778 as -mtune=generic. With native compilers we won't see the
1779 -mtune=native, as it was changed by the driver. */
1780 || !strcmp (ix86_tune_string, "native"))
1783 ix86_tune_string = "generic64";
1785 ix86_tune_string = "generic32";
1787 else if (!strncmp (ix86_tune_string, "generic", 7))
1788 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1792 if (ix86_arch_string)
1793 ix86_tune_string = ix86_arch_string;
1794 if (!ix86_tune_string)
1796 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1797 ix86_tune_defaulted = 1;
1800 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1801 need to use a sensible tune option. */
1802 if (!strcmp (ix86_tune_string, "generic")
1803 || !strcmp (ix86_tune_string, "x86-64")
1804 || !strcmp (ix86_tune_string, "i686"))
1807 ix86_tune_string = "generic64";
1809 ix86_tune_string = "generic32";
1812 if (ix86_stringop_string)
1814 if (!strcmp (ix86_stringop_string, "rep_byte"))
1815 stringop_alg = rep_prefix_1_byte;
1816 else if (!strcmp (ix86_stringop_string, "libcall"))
1817 stringop_alg = libcall;
1818 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1819 stringop_alg = rep_prefix_4_byte;
1820 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1821 stringop_alg = rep_prefix_8_byte;
1822 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1823 stringop_alg = loop_1_byte;
1824 else if (!strcmp (ix86_stringop_string, "loop"))
1825 stringop_alg = loop;
1826 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1827 stringop_alg = unrolled_loop;
1829 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1831 if (!strcmp (ix86_tune_string, "x86-64"))
1832 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1833 "-mtune=generic instead as appropriate.");
1835 if (!ix86_arch_string)
1836 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1837 if (!strcmp (ix86_arch_string, "generic"))
1838 error ("generic CPU can be used only for -mtune= switch");
1839 if (!strncmp (ix86_arch_string, "generic", 7))
1840 error ("bad value (%s) for -march= switch", ix86_arch_string);
1842 if (ix86_cmodel_string != 0)
1844 if (!strcmp (ix86_cmodel_string, "small"))
1845 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1846 else if (!strcmp (ix86_cmodel_string, "medium"))
1847 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1849 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1850 else if (!strcmp (ix86_cmodel_string, "32"))
1851 ix86_cmodel = CM_32;
1852 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1853 ix86_cmodel = CM_KERNEL;
1854 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1855 ix86_cmodel = CM_LARGE;
1857 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1861 ix86_cmodel = CM_32;
1863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1865 if (ix86_asm_string != 0)
1868 && !strcmp (ix86_asm_string, "intel"))
1869 ix86_asm_dialect = ASM_INTEL;
1870 else if (!strcmp (ix86_asm_string, "att"))
1871 ix86_asm_dialect = ASM_ATT;
1873 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1875 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1876 error ("code model %qs not supported in the %s bit mode",
1877 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1878 if (ix86_cmodel == CM_LARGE)
1879 sorry ("code model %<large%> not supported yet");
1880 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1881 sorry ("%i-bit mode not compiled in",
1882 (target_flags & MASK_64BIT) ? 64 : 32);
1884 for (i = 0; i < pta_size; i++)
1885 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1887 ix86_arch = processor_alias_table[i].processor;
1888 /* Default cpu tuning to the architecture. */
1889 ix86_tune = ix86_arch;
1890 if (processor_alias_table[i].flags & PTA_MMX
1891 && !(target_flags_explicit & MASK_MMX))
1892 target_flags |= MASK_MMX;
1893 if (processor_alias_table[i].flags & PTA_3DNOW
1894 && !(target_flags_explicit & MASK_3DNOW))
1895 target_flags |= MASK_3DNOW;
1896 if (processor_alias_table[i].flags & PTA_3DNOW_A
1897 && !(target_flags_explicit & MASK_3DNOW_A))
1898 target_flags |= MASK_3DNOW_A;
1899 if (processor_alias_table[i].flags & PTA_SSE
1900 && !(target_flags_explicit & MASK_SSE))
1901 target_flags |= MASK_SSE;
1902 if (processor_alias_table[i].flags & PTA_SSE2
1903 && !(target_flags_explicit & MASK_SSE2))
1904 target_flags |= MASK_SSE2;
1905 if (processor_alias_table[i].flags & PTA_SSE3
1906 && !(target_flags_explicit & MASK_SSE3))
1907 target_flags |= MASK_SSE3;
1908 if (processor_alias_table[i].flags & PTA_SSSE3
1909 && !(target_flags_explicit & MASK_SSSE3))
1910 target_flags |= MASK_SSSE3;
1911 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1912 x86_prefetch_sse = true;
1913 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1914 error ("CPU you selected does not support x86-64 "
1920 error ("bad value (%s) for -march= switch", ix86_arch_string);
1922 for (i = 0; i < pta_size; i++)
1923 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1925 ix86_tune = processor_alias_table[i].processor;
1926 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1928 if (ix86_tune_defaulted)
1930 ix86_tune_string = "x86-64";
1931 for (i = 0; i < pta_size; i++)
1932 if (! strcmp (ix86_tune_string,
1933 processor_alias_table[i].name))
1935 ix86_tune = processor_alias_table[i].processor;
1938 error ("CPU you selected does not support x86-64 "
1941 /* Intel CPUs have always interpreted SSE prefetch instructions as
1942 NOPs; so, we can enable SSE prefetch instructions even when
1943 -mtune (rather than -march) points us to a processor that has them.
1944 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1945 higher processors. */
1946 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1947 x86_prefetch_sse = true;
1951 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1954 ix86_cost = &size_cost;
1956 ix86_cost = processor_target_table[ix86_tune].cost;
1957 target_flags |= processor_target_table[ix86_tune].target_enable;
1958 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1960 /* Arrange to set up i386_stack_locals for all functions. */
1961 init_machine_status = ix86_init_machine_status;
1963 /* Validate -mregparm= value. */
1964 if (ix86_regparm_string)
1966 i = atoi (ix86_regparm_string);
1967 if (i < 0 || i > REGPARM_MAX)
1968 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1974 ix86_regparm = REGPARM_MAX;
1976 /* If the user has provided any of the -malign-* options,
1977 warn and use that value only if -falign-* is not set.
1978 Remove this code in GCC 3.2 or later. */
1979 if (ix86_align_loops_string)
1981 warning (0, "-malign-loops is obsolete, use -falign-loops");
1982 if (align_loops == 0)
1984 i = atoi (ix86_align_loops_string);
1985 if (i < 0 || i > MAX_CODE_ALIGN)
1986 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1988 align_loops = 1 << i;
1992 if (ix86_align_jumps_string)
1994 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1995 if (align_jumps == 0)
1997 i = atoi (ix86_align_jumps_string);
1998 if (i < 0 || i > MAX_CODE_ALIGN)
1999 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2001 align_jumps = 1 << i;
2005 if (ix86_align_funcs_string)
2007 warning (0, "-malign-functions is obsolete, use -falign-functions");
2008 if (align_functions == 0)
2010 i = atoi (ix86_align_funcs_string);
2011 if (i < 0 || i > MAX_CODE_ALIGN)
2012 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2014 align_functions = 1 << i;
2018 /* Default align_* from the processor table. */
2019 if (align_loops == 0)
2021 align_loops = processor_target_table[ix86_tune].align_loop;
2022 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2024 if (align_jumps == 0)
2026 align_jumps = processor_target_table[ix86_tune].align_jump;
2027 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2029 if (align_functions == 0)
2031 align_functions = processor_target_table[ix86_tune].align_func;
2034 /* Validate -mbranch-cost= value, or provide default. */
2035 ix86_branch_cost = ix86_cost->branch_cost;
2036 if (ix86_branch_cost_string)
2038 i = atoi (ix86_branch_cost_string);
2040 error ("-mbranch-cost=%d is not between 0 and 5", i);
2042 ix86_branch_cost = i;
2044 if (ix86_section_threshold_string)
2046 i = atoi (ix86_section_threshold_string);
2048 error ("-mlarge-data-threshold=%d is negative", i);
2050 ix86_section_threshold = i;
2053 if (ix86_tls_dialect_string)
2055 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2056 ix86_tls_dialect = TLS_DIALECT_GNU;
2057 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2058 ix86_tls_dialect = TLS_DIALECT_GNU2;
2059 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2060 ix86_tls_dialect = TLS_DIALECT_SUN;
2062 error ("bad value (%s) for -mtls-dialect= switch",
2063 ix86_tls_dialect_string);
2066 /* Keep nonleaf frame pointers. */
2067 if (flag_omit_frame_pointer)
2068 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2069 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2070 flag_omit_frame_pointer = 1;
2072 /* If we're doing fast math, we don't care about comparison order
2073 wrt NaNs. This lets us use a shorter comparison sequence. */
2074 if (flag_finite_math_only)
2075 target_flags &= ~MASK_IEEE_FP;
2077 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2078 since the insns won't need emulation. */
2079 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2080 target_flags &= ~MASK_NO_FANCY_MATH_387;
2082 /* Likewise, if the target doesn't have a 387, or we've specified
2083 software floating point, don't use 387 inline intrinsics. */
2085 target_flags |= MASK_NO_FANCY_MATH_387;
2087 /* Turn on SSE3 builtins for -mssse3. */
2089 target_flags |= MASK_SSE3;
2091 /* Turn on SSE2 builtins for -msse3. */
2093 target_flags |= MASK_SSE2;
2095 /* Turn on SSE builtins for -msse2. */
2097 target_flags |= MASK_SSE;
2099 /* Turn on MMX builtins for -msse. */
2102 target_flags |= MASK_MMX & ~target_flags_explicit;
2103 x86_prefetch_sse = true;
2106 /* Turn on MMX builtins for 3Dnow. */
2108 target_flags |= MASK_MMX;
2112 if (TARGET_ALIGN_DOUBLE)
2113 error ("-malign-double makes no sense in the 64bit mode");
2115 error ("-mrtd calling convention not supported in the 64bit mode");
2117 /* Enable by default the SSE and MMX builtins. Do allow the user to
2118 explicitly disable any of these. In particular, disabling SSE and
2119 MMX for kernel code is extremely useful. */
2121 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2122 & ~target_flags_explicit);
2126 /* i386 ABI does not specify red zone. It still makes sense to use it
2127 when programmer takes care to stack from being destroyed. */
2128 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2129 target_flags |= MASK_NO_RED_ZONE;
2132 /* Validate -mpreferred-stack-boundary= value, or provide default.
2133 The default of 128 bits is for Pentium III's SSE __m128. We can't
2134 change it because of optimize_size. Otherwise, we can't mix object
2135 files compiled with -Os and -On. */
2136 ix86_preferred_stack_boundary = 128;
2137 if (ix86_preferred_stack_boundary_string)
2139 i = atoi (ix86_preferred_stack_boundary_string);
2140 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2141 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2142 TARGET_64BIT ? 4 : 2);
2144 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2147 /* Accept -mx87regparm only if 80387 support is enabled. */
2148 if (TARGET_X87REGPARM
2150 error ("-mx87regparm used without 80387 enabled");
2152 /* Accept -msseregparm only if at least SSE support is enabled. */
2153 if (TARGET_SSEREGPARM
2155 error ("-msseregparm used without SSE enabled");
2157 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2159 if (ix86_fpmath_string != 0)
2161 if (! strcmp (ix86_fpmath_string, "387"))
2162 ix86_fpmath = FPMATH_387;
2163 else if (! strcmp (ix86_fpmath_string, "sse"))
2167 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2168 ix86_fpmath = FPMATH_387;
2171 ix86_fpmath = FPMATH_SSE;
2173 else if (! strcmp (ix86_fpmath_string, "387,sse")
2174 || ! strcmp (ix86_fpmath_string, "sse,387"))
2178 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2179 ix86_fpmath = FPMATH_387;
2181 else if (!TARGET_80387)
2183 warning (0, "387 instruction set disabled, using SSE arithmetics");
2184 ix86_fpmath = FPMATH_SSE;
2187 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2190 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2193 /* If the i387 is disabled, then do not return values in it. */
2195 target_flags &= ~MASK_FLOAT_RETURNS;
2197 if ((x86_accumulate_outgoing_args & TUNEMASK)
2198 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2200 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2202 /* ??? Unwind info is not correct around the CFG unless either a frame
2203 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2204 unwind info generation to be aware of the CFG and propagating states
2206 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2207 || flag_exceptions || flag_non_call_exceptions)
2208 && flag_omit_frame_pointer
2209 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2211 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2212 warning (0, "unwind tables currently require either a frame pointer "
2213 "or -maccumulate-outgoing-args for correctness");
2214 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2217 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2220 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2221 p = strchr (internal_label_prefix, 'X');
2222 internal_label_prefix_len = p - internal_label_prefix;
2226 /* When scheduling description is not available, disable scheduler pass
2227 so it won't slow down the compilation and make x87 code slower. */
2228 if (!TARGET_SCHEDULE)
2229 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2231 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2232 set_param_value ("simultaneous-prefetches",
2233 ix86_cost->simultaneous_prefetches);
2234 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2235 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2238 /* switch to the appropriate section for output of DECL.
2239 DECL is either a `VAR_DECL' node or a constant of some sort.
2240 RELOC indicates whether forming the initial value of DECL requires
2241 link-time relocations. */
2244 x86_64_elf_select_section (tree decl, int reloc,
2245 unsigned HOST_WIDE_INT align)
2247 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2248 && ix86_in_large_data_p (decl))
2250 const char *sname = NULL;
2251 unsigned int flags = SECTION_WRITE;
2252 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2257 case SECCAT_DATA_REL:
2258 sname = ".ldata.rel";
2260 case SECCAT_DATA_REL_LOCAL:
2261 sname = ".ldata.rel.local";
2263 case SECCAT_DATA_REL_RO:
2264 sname = ".ldata.rel.ro";
2266 case SECCAT_DATA_REL_RO_LOCAL:
2267 sname = ".ldata.rel.ro.local";
2271 flags |= SECTION_BSS;
2274 case SECCAT_RODATA_MERGE_STR:
2275 case SECCAT_RODATA_MERGE_STR_INIT:
2276 case SECCAT_RODATA_MERGE_CONST:
2280 case SECCAT_SRODATA:
2287 /* We don't split these for medium model. Place them into
2288 default sections and hope for best. */
2293 /* We might get called with string constants, but get_named_section
2294 doesn't like them as they are not DECLs. Also, we need to set
2295 flags in that case. */
2297 return get_section (sname, flags, NULL);
2298 return get_named_section (decl, sname, reloc);
2301 return default_elf_select_section (decl, reloc, align);
2304 /* Build up a unique section name, expressed as a
2305 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2306 RELOC indicates whether the initial value of EXP requires
2307 link-time relocations. */
2310 x86_64_elf_unique_section (tree decl, int reloc)
2312 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2313 && ix86_in_large_data_p (decl))
2315 const char *prefix = NULL;
2316 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2317 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2319 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2322 case SECCAT_DATA_REL:
2323 case SECCAT_DATA_REL_LOCAL:
2324 case SECCAT_DATA_REL_RO:
2325 case SECCAT_DATA_REL_RO_LOCAL:
2326 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2329 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2332 case SECCAT_RODATA_MERGE_STR:
2333 case SECCAT_RODATA_MERGE_STR_INIT:
2334 case SECCAT_RODATA_MERGE_CONST:
2335 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2337 case SECCAT_SRODATA:
2344 /* We don't split these for medium model. Place them into
2345 default sections and hope for best. */
2353 plen = strlen (prefix);
2355 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2356 name = targetm.strip_name_encoding (name);
2357 nlen = strlen (name);
2359 string = alloca (nlen + plen + 1);
2360 memcpy (string, prefix, plen);
2361 memcpy (string + plen, name, nlen + 1);
2363 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2367 default_unique_section (decl, reloc);
2370 #ifdef COMMON_ASM_OP
2371 /* This says how to output assembler code to declare an
2372 uninitialized external linkage data object.
2374 For medium model x86-64 we need to use .largecomm opcode for
2377 x86_elf_aligned_common (FILE *file,
2378 const char *name, unsigned HOST_WIDE_INT size,
2381 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2382 && size > (unsigned int)ix86_section_threshold)
2383 fprintf (file, ".largecomm\t");
2385 fprintf (file, "%s", COMMON_ASM_OP);
2386 assemble_name (file, name);
2387 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2388 size, align / BITS_PER_UNIT);
2391 /* Utility function for targets to use in implementing
2392 ASM_OUTPUT_ALIGNED_BSS. */
2395 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2396 const char *name, unsigned HOST_WIDE_INT size,
2399 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2400 && size > (unsigned int)ix86_section_threshold)
2401 switch_to_section (get_named_section (decl, ".lbss", 0));
2403 switch_to_section (bss_section);
2404 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2405 #ifdef ASM_DECLARE_OBJECT_NAME
2406 last_assemble_variable_decl = decl;
2407 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2409 /* Standard thing is just output label for the object. */
2410 ASM_OUTPUT_LABEL (file, name);
2411 #endif /* ASM_DECLARE_OBJECT_NAME */
2412 ASM_OUTPUT_SKIP (file, size ? size : 1);
2417 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2419 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2420 make the problem with not enough registers even worse. */
2421 #ifdef INSN_SCHEDULING
2423 flag_schedule_insns = 0;
2427 /* The Darwin libraries never set errno, so we might as well
2428 avoid calling them when that's the only reason we would. */
2429 flag_errno_math = 0;
2431 /* The default values of these switches depend on the TARGET_64BIT
2432 that is not known at this moment. Mark these values with 2 and
2433 let user the to override these. In case there is no command line option
2434 specifying them, we will set the defaults in override_options. */
2436 flag_omit_frame_pointer = 2;
2437 flag_pcc_struct_return = 2;
2438 flag_asynchronous_unwind_tables = 2;
2439 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2440 SUBTARGET_OPTIMIZATION_OPTIONS;
2444 /* Table of valid machine attributes. */
2445 const struct attribute_spec ix86_attribute_table[] =
2447 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2448 /* Stdcall attribute says callee is responsible for popping arguments
2449 if they are not variable. */
2450 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2451 /* Fastcall attribute says callee is responsible for popping arguments
2452 if they are not variable. */
2453 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2454 /* Cdecl attribute says the callee is a normal C declaration */
2455 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2456 /* Regparm attribute specifies how many integer arguments are to be
2457 passed in registers. */
2458 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2459 /* X87regparm attribute says we are passing floating point arguments
2460 in 80387 registers. */
2461 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2462 /* Sseregparm attribute says we are using x86_64 calling conventions
2463 for FP arguments. */
2464 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2465 /* force_align_arg_pointer says this function realigns the stack at entry. */
2466 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2467 false, true, true, ix86_handle_cconv_attribute },
2468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2469 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2470 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2471 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2473 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2474 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2475 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2476 SUBTARGET_ATTRIBUTE_TABLE,
2478 { NULL, 0, 0, false, false, false, NULL }
2481 /* Decide whether we can make a sibling call to a function. DECL is the
2482 declaration of the function being targeted by the call and EXP is the
2483 CALL_EXPR representing the call. */
2486 ix86_function_ok_for_sibcall (tree decl, tree exp)
2491 /* If we are generating position-independent code, we cannot sibcall
2492 optimize any indirect call, or a direct call to a global function,
2493 as the PLT requires %ebx be live. */
2494 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2501 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2502 if (POINTER_TYPE_P (func))
2503 func = TREE_TYPE (func);
2506 /* Check that the return value locations are the same. Like
2507 if we are returning floats on the 80387 register stack, we cannot
2508 make a sibcall from a function that doesn't return a float to a
2509 function that does or, conversely, from a function that does return
2510 a float to a function that doesn't; the necessary stack adjustment
2511 would not be executed. This is also the place we notice
2512 differences in the return value ABI. Note that it is ok for one
2513 of the functions to have void return type as long as the return
2514 value of the other is passed in a register. */
2515 a = ix86_function_value (TREE_TYPE (exp), func, false);
2516 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2518 if (STACK_REG_P (a) || STACK_REG_P (b))
2520 if (!rtx_equal_p (a, b))
2523 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2525 else if (!rtx_equal_p (a, b))
2528 /* If this call is indirect, we'll need to be able to use a call-clobbered
2529 register for the address of the target function. Make sure that all
2530 such registers are not used for passing parameters. */
2531 if (!decl && !TARGET_64BIT)
2535 /* We're looking at the CALL_EXPR, we need the type of the function. */
2536 type = TREE_OPERAND (exp, 0); /* pointer expression */
2537 type = TREE_TYPE (type); /* pointer type */
2538 type = TREE_TYPE (type); /* function type */
2540 if (ix86_function_regparm (type, NULL) >= 3)
2542 /* ??? Need to count the actual number of registers to be used,
2543 not the possible number of registers. Fix later. */
2548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2549 /* Dllimport'd functions are also called indirectly. */
2550 if (decl && DECL_DLLIMPORT_P (decl)
2551 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2555 /* If we forced aligned the stack, then sibcalling would unalign the
2556 stack, which may break the called function. */
2557 if (cfun->machine->force_align_arg_pointer)
2560 /* Otherwise okay. That also includes certain types of indirect calls. */
2564 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2565 and "sseregparm" calling convention attributes;
2566 arguments as in struct attribute_spec.handler. */
2569 ix86_handle_cconv_attribute (tree *node, tree name,
2571 int flags ATTRIBUTE_UNUSED,
2574 if (TREE_CODE (*node) != FUNCTION_TYPE
2575 && TREE_CODE (*node) != METHOD_TYPE
2576 && TREE_CODE (*node) != FIELD_DECL
2577 && TREE_CODE (*node) != TYPE_DECL)
2579 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2580 IDENTIFIER_POINTER (name));
2581 *no_add_attrs = true;
2585 /* Can combine regparm with all attributes but fastcall. */
2586 if (is_attribute_p ("regparm", name))
2590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2592 error ("fastcall and regparm attributes are not compatible");
2595 cst = TREE_VALUE (args);
2596 if (TREE_CODE (cst) != INTEGER_CST)
2598 warning (OPT_Wattributes,
2599 "%qs attribute requires an integer constant argument",
2600 IDENTIFIER_POINTER (name));
2601 *no_add_attrs = true;
2603 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2605 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2606 IDENTIFIER_POINTER (name), REGPARM_MAX);
2607 *no_add_attrs = true;
2611 && lookup_attribute (ix86_force_align_arg_pointer_string,
2612 TYPE_ATTRIBUTES (*node))
2613 && compare_tree_int (cst, REGPARM_MAX-1))
2615 error ("%s functions limited to %d register parameters",
2616 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2624 warning (OPT_Wattributes, "%qs attribute ignored",
2625 IDENTIFIER_POINTER (name));
2626 *no_add_attrs = true;
2630 /* Can combine fastcall with stdcall (redundant), x87regparm
2632 if (is_attribute_p ("fastcall", name))
2634 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2636 error ("fastcall and cdecl attributes are not compatible");
2638 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2640 error ("fastcall and stdcall attributes are not compatible");
2642 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2644 error ("fastcall and regparm attributes are not compatible");
2648 /* Can combine stdcall with fastcall (redundant), regparm,
2649 x87regparm and sseregparm. */
2650 else if (is_attribute_p ("stdcall", name))
2652 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2654 error ("stdcall and cdecl attributes are not compatible");
2656 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2658 error ("stdcall and fastcall attributes are not compatible");
2662 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2663 else if (is_attribute_p ("cdecl", name))
2665 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2667 error ("stdcall and cdecl attributes are not compatible");
2669 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2671 error ("fastcall and cdecl attributes are not compatible");
2675 /* Can combine x87regparm or sseregparm with all attributes. */
2680 /* Return 0 if the attributes for two types are incompatible, 1 if they
2681 are compatible, and 2 if they are nearly compatible (which causes a
2682 warning to be generated). */
2685 ix86_comp_type_attributes (tree type1, tree type2)
2687 /* Check for mismatch of non-default calling convention. */
2688 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2690 if (TREE_CODE (type1) != FUNCTION_TYPE)
2693 /* Check for mismatched fastcall/regparm types. */
2694 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2695 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2696 || (ix86_function_regparm (type1, NULL)
2697 != ix86_function_regparm (type2, NULL)))
2700 /* Check for mismatched x87regparm types. */
2701 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2702 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2705 /* Check for mismatched sseregparm types. */
2706 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2707 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2710 /* Check for mismatched return types (cdecl vs stdcall). */
2711 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2712 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2718 /* Return the regparm value for a function with the indicated TYPE and DECL.
2719 DECL may be NULL when calling function indirectly
2720 or considering a libcall. */
2723 ix86_function_regparm (tree type, tree decl)
2726 int regparm = ix86_regparm;
2727 bool user_convention = false;
2731 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2734 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2735 user_convention = true;
2738 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2741 user_convention = true;
2744 /* Use register calling convention for local functions when possible. */
2745 if (!TARGET_64BIT && !user_convention && decl
2746 && flag_unit_at_a_time && !profile_flag)
2748 struct cgraph_local_info *i = cgraph_local_info (decl);
2751 int local_regparm, globals = 0, regno;
2753 /* Make sure no regparm register is taken by a global register
2755 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2756 if (global_regs[local_regparm])
2758 /* We can't use regparm(3) for nested functions as these use
2759 static chain pointer in third argument. */
2760 if (local_regparm == 3
2761 && decl_function_context (decl)
2762 && !DECL_NO_STATIC_CHAIN (decl))
2764 /* If the function realigns its stackpointer, the
2765 prologue will clobber %ecx. If we've already
2766 generated code for the callee, the callee
2767 DECL_STRUCT_FUNCTION is gone, so we fall back to
2768 scanning the attributes for the self-realigning
2770 if ((DECL_STRUCT_FUNCTION (decl)
2771 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2772 || (!DECL_STRUCT_FUNCTION (decl)
2773 && lookup_attribute (ix86_force_align_arg_pointer_string,
2774 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2776 /* Each global register variable increases register preassure,
2777 so the more global reg vars there are, the smaller regparm
2778 optimization use, unless requested by the user explicitly. */
2779 for (regno = 0; regno < 6; regno++)
2780 if (global_regs[regno])
2783 = globals < local_regparm ? local_regparm - globals : 0;
2785 if (local_regparm > regparm)
2786 regparm = local_regparm;
2793 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2794 arguments in x87 registers for a function with the indicated
2795 TYPE and DECL. DECL may be NULL when calling function indirectly
2796 or considering a libcall. For local functions, return 2.
2797 Otherwise return 0. */
2800 ix86_function_x87regparm (tree type, tree decl)
2802 /* Use x87 registers to pass floating point arguments if requested
2803 by the x87regparm attribute. */
2804 if (TARGET_X87REGPARM
2806 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2811 error ("Calling %qD with attribute x87regparm without "
2812 "80387 enabled", decl);
2814 error ("Calling %qT with attribute x87regparm without "
2815 "80387 enabled", type);
2822 /* For local functions, pass up to X87_REGPARM_MAX floating point
2823 arguments in x87 registers. */
2824 if (!TARGET_64BIT && decl
2825 && flag_unit_at_a_time && !profile_flag)
2827 struct cgraph_local_info *i = cgraph_local_info (decl);
2835 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2836 DFmode (2) arguments in SSE registers for a function with the
2837 indicated TYPE and DECL. DECL may be NULL when calling function
2838 indirectly or considering a libcall. Otherwise return 0. */
2841 ix86_function_sseregparm (tree type, tree decl)
2843 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2844 by the sseregparm attribute. */
2845 if (TARGET_SSEREGPARM
2847 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2852 error ("Calling %qD with attribute sseregparm without "
2853 "SSE/SSE2 enabled", decl);
2855 error ("Calling %qT with attribute sseregparm without "
2856 "SSE/SSE2 enabled", type);
2863 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2864 (and DFmode for SSE2) arguments in SSE registers,
2865 even for 32-bit targets. */
2866 if (!TARGET_64BIT && decl
2867 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2869 struct cgraph_local_info *i = cgraph_local_info (decl);
2871 return TARGET_SSE2 ? 2 : 1;
2877 /* Return true if EAX is live at the start of the function. Used by
2878 ix86_expand_prologue to determine if we need special help before
2879 calling allocate_stack_worker. */
2882 ix86_eax_live_at_start_p (void)
2884 /* Cheat. Don't bother working forward from ix86_function_regparm
2885 to the function type to whether an actual argument is located in
2886 eax. Instead just look at cfg info, which is still close enough
2887 to correct at this point. This gives false positives for broken
2888 functions that might use uninitialized data that happens to be
2889 allocated in eax, but who cares? */
2890 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2893 /* Value is the number of bytes of arguments automatically
2894 popped when returning from a subroutine call.
2895 FUNDECL is the declaration node of the function (as a tree),
2896 FUNTYPE is the data type of the function (as a tree),
2897 or for a library call it is an identifier node for the subroutine name.
2898 SIZE is the number of bytes of arguments passed on the stack.
2900 On the 80386, the RTD insn may be used to pop them if the number
2901 of args is fixed, but if the number is variable then the caller
2902 must pop them all. RTD can't be used for library calls now
2903 because the library is compiled with the Unix compiler.
2904 Use of RTD is a selectable option, since it is incompatible with
2905 standard Unix calling sequences. If the option is not selected,
2906 the caller must always pop the args.
2908 The attribute stdcall is equivalent to RTD on a per module basis. */
2911 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2913 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2915 /* Cdecl functions override -mrtd, and never pop the stack. */
2916 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2918 /* Stdcall and fastcall functions will pop the stack if not
2920 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2921 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2925 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2926 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2927 == void_type_node)))
2931 /* Lose any fake structure return argument if it is passed on the stack. */
2932 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2934 && !KEEP_AGGREGATE_RETURN_POINTER)
2936 int nregs = ix86_function_regparm (funtype, fundecl);
2939 return GET_MODE_SIZE (Pmode);
2945 /* Argument support functions. */
2947 /* Return true when register may be used to pass function parameters. */
2949 ix86_function_arg_regno_p (int regno)
2953 return (regno < REGPARM_MAX
2954 || (TARGET_80387 && FP_REGNO_P (regno)
2955 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2956 || (TARGET_MMX && MMX_REGNO_P (regno)
2957 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2958 || (TARGET_SSE && SSE_REGNO_P (regno)
2959 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2961 if (TARGET_SSE && SSE_REGNO_P (regno)
2962 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2964 /* RAX is used as hidden argument to va_arg functions. */
2967 for (i = 0; i < REGPARM_MAX; i++)
2968 if (regno == x86_64_int_parameter_registers[i])
2973 /* Return if we do not know how to pass TYPE solely in registers. */
2976 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2978 if (must_pass_in_stack_var_size_or_pad (mode, type))
2981 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2982 The layout_type routine is crafty and tries to trick us into passing
2983 currently unsupported vector types on the stack by using TImode. */
2984 return (!TARGET_64BIT && mode == TImode
2985 && type && TREE_CODE (type) != VECTOR_TYPE);
2988 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2989 for a call to a function whose data type is FNTYPE.
2990 For a library call, FNTYPE is 0. */
2993 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2994 tree fntype, /* tree ptr for function decl */
2995 rtx libname, /* SYMBOL_REF of library name or 0 */
2998 static CUMULATIVE_ARGS zero_cum;
2999 tree param, next_param;
3001 if (TARGET_DEBUG_ARG)
3003 fprintf (stderr, "\ninit_cumulative_args (");
3005 fprintf (stderr, "fntype code = %s, ret code = %s",
3006 tree_code_name[(int) TREE_CODE (fntype)],
3007 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3009 fprintf (stderr, "no fntype");
3012 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3017 /* Set up the number of registers to use for passing arguments. */
3018 cum->nregs = ix86_regparm;
3020 cum->x87_nregs = X87_REGPARM_MAX;
3022 cum->sse_nregs = SSE_REGPARM_MAX;
3024 cum->mmx_nregs = MMX_REGPARM_MAX;
3025 cum->warn_sse = true;
3026 cum->warn_mmx = true;
3027 cum->maybe_vaarg = false;
3029 /* Use ecx and edx registers if function has fastcall attribute,
3030 else look for regparm information. */
3031 if (fntype && !TARGET_64BIT)
3033 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3039 cum->nregs = ix86_function_regparm (fntype, fndecl);
3042 /* Set up the number of 80387 registers used for passing
3043 floating point arguments. Warn for mismatching ABI. */
3044 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
3046 /* Set up the number of SSE registers used for passing SFmode
3047 and DFmode arguments. Warn for mismatching ABI. */
3048 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3050 /* Determine if this function has variable arguments. This is
3051 indicated by the last argument being 'void_type_mode' if there
3052 are no variable arguments. If there are variable arguments, then
3053 we won't pass anything in registers in 32-bit mode. */
3055 if (cum->nregs || cum->mmx_nregs
3056 || cum->x87_nregs || cum->sse_nregs)
3058 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3059 param != 0; param = next_param)
3061 next_param = TREE_CHAIN (param);
3062 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3073 cum->float_in_x87 = 0;
3074 cum->float_in_sse = 0;
3076 cum->maybe_vaarg = true;
3080 if ((!fntype && !libname)
3081 || (fntype && !TYPE_ARG_TYPES (fntype)))
3082 cum->maybe_vaarg = true;
3084 if (TARGET_DEBUG_ARG)
3085 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3090 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3091 But in the case of vector types, it is some vector mode.
3093 When we have only some of our vector isa extensions enabled, then there
3094 are some modes for which vector_mode_supported_p is false. For these
3095 modes, the generic vector support in gcc will choose some non-vector mode
3096 in order to implement the type. By computing the natural mode, we'll
3097 select the proper ABI location for the operand and not depend on whatever
3098 the middle-end decides to do with these vector types. */
3100 static enum machine_mode
3101 type_natural_mode (tree type)
3103 enum machine_mode mode = TYPE_MODE (type);
3105 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3107 HOST_WIDE_INT size = int_size_in_bytes (type);
3108 if ((size == 8 || size == 16)
3109 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3110 && TYPE_VECTOR_SUBPARTS (type) > 1)
3112 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3114 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3115 mode = MIN_MODE_VECTOR_FLOAT;
3117 mode = MIN_MODE_VECTOR_INT;
3119 /* Get the mode which has this inner mode and number of units. */
3120 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3121 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3122 && GET_MODE_INNER (mode) == innermode)
3132 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3133 this may not agree with the mode that the type system has chosen for the
3134 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3135 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3138 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3143 if (orig_mode != BLKmode)
3144 tmp = gen_rtx_REG (orig_mode, regno);
3147 tmp = gen_rtx_REG (mode, regno);
3148 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3149 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3155 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3156 of this code is to classify each 8bytes of incoming argument by the register
3157 class and assign registers accordingly. */
3159 /* Return the union class of CLASS1 and CLASS2.
3160 See the x86-64 PS ABI for details. */
3162 static enum x86_64_reg_class
3163 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3165 /* Rule #1: If both classes are equal, this is the resulting class. */
3166 if (class1 == class2)
3169 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3171 if (class1 == X86_64_NO_CLASS)
3173 if (class2 == X86_64_NO_CLASS)
3176 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3177 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3178 return X86_64_MEMORY_CLASS;
3180 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3181 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3182 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3183 return X86_64_INTEGERSI_CLASS;
3184 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3185 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3186 return X86_64_INTEGER_CLASS;
3188 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3190 if (class1 == X86_64_X87_CLASS
3191 || class1 == X86_64_X87UP_CLASS
3192 || class1 == X86_64_COMPLEX_X87_CLASS
3193 || class2 == X86_64_X87_CLASS
3194 || class2 == X86_64_X87UP_CLASS
3195 || class2 == X86_64_COMPLEX_X87_CLASS)
3196 return X86_64_MEMORY_CLASS;
3198 /* Rule #6: Otherwise class SSE is used. */
3199 return X86_64_SSE_CLASS;
3202 /* Classify the argument of type TYPE and mode MODE.
3203 CLASSES will be filled by the register class used to pass each word
3204 of the operand. The number of words is returned. In case the parameter
3205 should be passed in memory, 0 is returned. As a special case for zero
3206 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3208 BIT_OFFSET is used internally for handling records and specifies offset
3209 of the offset in bits modulo 256 to avoid overflow cases.
3211 See the x86-64 PS ABI for details.
3215 classify_argument (enum machine_mode mode, tree type,
3216 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3218 HOST_WIDE_INT bytes =
3219 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3220 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3222 /* Variable sized entities are always passed/returned in memory. */
3226 if (mode != VOIDmode
3227 && targetm.calls.must_pass_in_stack (mode, type))
3230 if (type && AGGREGATE_TYPE_P (type))
3234 enum x86_64_reg_class subclasses[MAX_CLASSES];
3236 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3240 for (i = 0; i < words; i++)
3241 classes[i] = X86_64_NO_CLASS;
3243 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3244 signalize memory class, so handle it as special case. */
3247 classes[0] = X86_64_NO_CLASS;
3251 /* Classify each field of record and merge classes. */
3252 switch (TREE_CODE (type))
3255 /* And now merge the fields of structure. */
3256 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3258 if (TREE_CODE (field) == FIELD_DECL)
3262 if (TREE_TYPE (field) == error_mark_node)
3265 /* Bitfields are always classified as integer. Handle them
3266 early, since later code would consider them to be
3267 misaligned integers. */
3268 if (DECL_BIT_FIELD (field))
3270 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3271 i < ((int_bit_position (field) + (bit_offset % 64))
3272 + tree_low_cst (DECL_SIZE (field), 0)
3275 merge_classes (X86_64_INTEGER_CLASS,
3280 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3281 TREE_TYPE (field), subclasses,
3282 (int_bit_position (field)
3283 + bit_offset) % 256);
3286 for (i = 0; i < num; i++)
3289 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3291 merge_classes (subclasses[i], classes[i + pos]);
3299 /* Arrays are handled as small records. */
3302 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3303 TREE_TYPE (type), subclasses, bit_offset);
3307 /* The partial classes are now full classes. */
3308 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3309 subclasses[0] = X86_64_SSE_CLASS;
3310 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3311 subclasses[0] = X86_64_INTEGER_CLASS;
3313 for (i = 0; i < words; i++)
3314 classes[i] = subclasses[i % num];
3319 case QUAL_UNION_TYPE:
3320 /* Unions are similar to RECORD_TYPE but offset is always 0.
3322 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3324 if (TREE_CODE (field) == FIELD_DECL)
3328 if (TREE_TYPE (field) == error_mark_node)
3331 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3332 TREE_TYPE (field), subclasses,
3336 for (i = 0; i < num; i++)
3337 classes[i] = merge_classes (subclasses[i], classes[i]);
3346 /* Final merger cleanup. */
3347 for (i = 0; i < words; i++)
3349 /* If one class is MEMORY, everything should be passed in
3351 if (classes[i] == X86_64_MEMORY_CLASS)
3354 /* The X86_64_SSEUP_CLASS should be always preceded by
3355 X86_64_SSE_CLASS. */
3356 if (classes[i] == X86_64_SSEUP_CLASS
3357 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3358 classes[i] = X86_64_SSE_CLASS;
3360 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3361 if (classes[i] == X86_64_X87UP_CLASS
3362 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3363 classes[i] = X86_64_SSE_CLASS;
3368 /* Compute alignment needed. We align all types to natural boundaries with
3369 exception of XFmode that is aligned to 64bits. */
3370 if (mode != VOIDmode && mode != BLKmode)
3372 int mode_alignment = GET_MODE_BITSIZE (mode);
3375 mode_alignment = 128;
3376 else if (mode == XCmode)
3377 mode_alignment = 256;
3378 if (COMPLEX_MODE_P (mode))
3379 mode_alignment /= 2;
3380 /* Misaligned fields are always returned in memory. */
3381 if (bit_offset % mode_alignment)
3385 /* for V1xx modes, just use the base mode */
3386 if (VECTOR_MODE_P (mode)
3387 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3388 mode = GET_MODE_INNER (mode);
3390 /* Classification of atomic types. */
3395 classes[0] = X86_64_SSE_CLASS;
3398 classes[0] = X86_64_SSE_CLASS;
3399 classes[1] = X86_64_SSEUP_CLASS;
3408 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3409 classes[0] = X86_64_INTEGERSI_CLASS;
3411 classes[0] = X86_64_INTEGER_CLASS;
3415 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3420 if (!(bit_offset % 64))
3421 classes[0] = X86_64_SSESF_CLASS;
3423 classes[0] = X86_64_SSE_CLASS;
3426 classes[0] = X86_64_SSEDF_CLASS;
3429 classes[0] = X86_64_X87_CLASS;
3430 classes[1] = X86_64_X87UP_CLASS;
3433 classes[0] = X86_64_SSE_CLASS;
3434 classes[1] = X86_64_SSEUP_CLASS;
3437 classes[0] = X86_64_SSE_CLASS;
3440 classes[0] = X86_64_SSEDF_CLASS;
3441 classes[1] = X86_64_SSEDF_CLASS;
3444 classes[0] = X86_64_COMPLEX_X87_CLASS;
3447 /* This modes is larger than 16 bytes. */
3455 classes[0] = X86_64_SSE_CLASS;
3456 classes[1] = X86_64_SSEUP_CLASS;
3462 classes[0] = X86_64_SSE_CLASS;
3468 gcc_assert (VECTOR_MODE_P (mode));
3473 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3475 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3476 classes[0] = X86_64_INTEGERSI_CLASS;
3478 classes[0] = X86_64_INTEGER_CLASS;
3479 classes[1] = X86_64_INTEGER_CLASS;
3480 return 1 + (bytes > 8);
3484 /* Examine the argument and return set number of register required in each
3485 class. Return 0 iff parameter should be passed in memory. */
3487 examine_argument (enum machine_mode mode, tree type, int in_return,
3488 int *int_nregs, int *sse_nregs)
3490 enum x86_64_reg_class class[MAX_CLASSES];
3491 int n = classify_argument (mode, type, class, 0);
3497 for (n--; n >= 0; n--)
3500 case X86_64_INTEGER_CLASS:
3501 case X86_64_INTEGERSI_CLASS:
3504 case X86_64_SSE_CLASS:
3505 case X86_64_SSESF_CLASS:
3506 case X86_64_SSEDF_CLASS:
3509 case X86_64_NO_CLASS:
3510 case X86_64_SSEUP_CLASS:
3512 case X86_64_X87_CLASS:
3513 case X86_64_X87UP_CLASS:
3517 case X86_64_COMPLEX_X87_CLASS:
3518 return in_return ? 2 : 0;
3519 case X86_64_MEMORY_CLASS:
3525 /* Construct container for the argument used by GCC interface. See
3526 FUNCTION_ARG for the detailed description. */
3529 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3530 tree type, int in_return, int nintregs, int nsseregs,
3531 const int *intreg, int sse_regno)
3533 /* The following variables hold the static issued_error state. */
3534 static bool issued_sse_arg_error;
3535 static bool issued_sse_ret_error;
3536 static bool issued_x87_ret_error;
3538 enum machine_mode tmpmode;
3540 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3541 enum x86_64_reg_class class[MAX_CLASSES];
3545 int needed_sseregs, needed_intregs;
3546 rtx exp[MAX_CLASSES];
3549 n = classify_argument (mode, type, class, 0);
3550 if (TARGET_DEBUG_ARG)
3553 fprintf (stderr, "Memory class\n");
3556 fprintf (stderr, "Classes:");
3557 for (i = 0; i < n; i++)
3559 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3561 fprintf (stderr, "\n");
3566 if (!examine_argument (mode, type, in_return, &needed_intregs,
3569 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3572 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3573 some less clueful developer tries to use floating-point anyway. */
3574 if (needed_sseregs && !TARGET_SSE)
3578 if (!issued_sse_ret_error)
3580 error ("SSE register return with SSE disabled");
3581 issued_sse_ret_error = true;
3584 else if (!issued_sse_arg_error)
3586 error ("SSE register argument with SSE disabled");
3587 issued_sse_arg_error = true;
3592 /* Likewise, error if the ABI requires us to return values in the
3593 x87 registers and the user specified -mno-80387. */
3594 if (!TARGET_80387 && in_return)
3595 for (i = 0; i < n; i++)
3596 if (class[i] == X86_64_X87_CLASS
3597 || class[i] == X86_64_X87UP_CLASS
3598 || class[i] == X86_64_COMPLEX_X87_CLASS)
3600 if (!issued_x87_ret_error)
3602 error ("x87 register return with x87 disabled");
3603 issued_x87_ret_error = true;
3608 /* First construct simple cases. Avoid SCmode, since we want to use
3609 single register to pass this type. */
3610 if (n == 1 && mode != SCmode)
3613 case X86_64_INTEGER_CLASS:
3614 case X86_64_INTEGERSI_CLASS:
3615 return gen_rtx_REG (mode, intreg[0]);
3616 case X86_64_SSE_CLASS:
3617 case X86_64_SSESF_CLASS:
3618 case X86_64_SSEDF_CLASS:
3619 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3620 case X86_64_X87_CLASS:
3621 case X86_64_COMPLEX_X87_CLASS:
3622 return gen_rtx_REG (mode, FIRST_STACK_REG);
3623 case X86_64_NO_CLASS:
3624 /* Zero sized array, struct or class. */
3629 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3631 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3633 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3634 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3635 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3636 && class[1] == X86_64_INTEGER_CLASS
3637 && (mode == CDImode || mode == TImode || mode == TFmode)
3638 && intreg[0] + 1 == intreg[1])
3639 return gen_rtx_REG (mode, intreg[0]);
3641 /* Otherwise figure out the entries of the PARALLEL. */
3642 for (i = 0; i < n; i++)
3646 case X86_64_NO_CLASS:
3648 case X86_64_INTEGER_CLASS:
3649 case X86_64_INTEGERSI_CLASS:
3650 /* Merge TImodes on aligned occasions here too. */
3651 if (i * 8 + 8 > bytes)
3652 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3653 else if (class[i] == X86_64_INTEGERSI_CLASS)
3657 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3658 if (tmpmode == BLKmode)
3660 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3661 gen_rtx_REG (tmpmode, *intreg),
3665 case X86_64_SSESF_CLASS:
3666 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3667 gen_rtx_REG (SFmode,
3668 SSE_REGNO (sse_regno)),
3672 case X86_64_SSEDF_CLASS:
3673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3674 gen_rtx_REG (DFmode,
3675 SSE_REGNO (sse_regno)),
3679 case X86_64_SSE_CLASS:
3680 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3684 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3685 gen_rtx_REG (tmpmode,
3686 SSE_REGNO (sse_regno)),
3688 if (tmpmode == TImode)
3697 /* Empty aligned struct, union or class. */
3701 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3702 for (i = 0; i < nexps; i++)
3703 XVECEXP (ret, 0, i) = exp [i];
3707 /* Update the data in CUM to advance over an argument
3708 of mode MODE and data type TYPE.
3709 (TYPE is null for libcalls where that information may not be available.) */
3712 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3713 tree type, int named)
3716 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3717 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3720 mode = type_natural_mode (type);
3722 if (TARGET_DEBUG_ARG)
3723 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3724 "mode=%s, named=%d)\n\n",
3725 words, cum->words, cum->nregs, cum->sse_nregs,
3726 GET_MODE_NAME (mode), named);
3730 int int_nregs, sse_nregs;
3731 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3732 cum->words += words;
3733 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3735 cum->nregs -= int_nregs;
3736 cum->sse_nregs -= sse_nregs;
3737 cum->regno += int_nregs;
3738 cum->sse_regno += sse_nregs;
3741 cum->words += words;
3759 cum->words += words;
3760 cum->nregs -= words;
3761 cum->regno += words;
3763 if (cum->nregs <= 0)
3771 if (cum->float_in_sse > 0)
3775 if (cum->float_in_sse > 1)
3778 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3779 rounding takes place when values are passed in x87
3780 registers, pass DFmode and SFmode types to local functions
3781 only when flag_unsafe_math_optimizations is set. */
3782 if (!cum->float_in_x87
3783 || (cum->float_in_x87 == 2
3784 && !flag_unsafe_math_optimizations))
3788 if (!cum->float_in_x87)
3791 if (!type || !AGGREGATE_TYPE_P (type))
3793 cum->x87_nregs -= 1;
3794 cum->x87_regno += 1;
3795 if (cum->x87_nregs <= 0)
3812 if (!type || !AGGREGATE_TYPE_P (type))
3814 cum->sse_nregs -= 1;
3815 cum->sse_regno += 1;
3816 if (cum->sse_nregs <= 0)
3828 if (!type || !AGGREGATE_TYPE_P (type))
3830 cum->mmx_nregs -= 1;
3831 cum->mmx_regno += 1;
3832 if (cum->mmx_nregs <= 0)
3843 /* Define where to put the arguments to a function.
3844 Value is zero to push the argument on the stack,
3845 or a hard register in which to store the argument.
3847 MODE is the argument's machine mode.
3848 TYPE is the data type of the argument (as a tree).
3849 This is null for libcalls where that information may
3851 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3852 the preceding args and about the function being called.
3853 NAMED is nonzero if this argument is a named parameter
3854 (otherwise it is an extra parameter matching an ellipsis). */
3857 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3858 tree type, int named)
3860 enum machine_mode mode = orig_mode;
3863 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3864 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3865 static bool warnedsse, warnedmmx;
3867 /* To simplify the code below, represent vector types with a vector mode
3868 even if MMX/SSE are not active. */
3869 if (type && TREE_CODE (type) == VECTOR_TYPE)
3870 mode = type_natural_mode (type);
3872 /* Handle a hidden AL argument containing number of registers for varargs
3873 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3875 if (mode == VOIDmode)
3878 return GEN_INT (cum->maybe_vaarg
3879 ? (cum->sse_nregs < 0
3887 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3889 &x86_64_int_parameter_registers [cum->regno],
3905 if (words <= cum->nregs)
3907 int regno = cum->regno;
3909 /* Fastcall allocates the first two DWORD (SImode) or
3910 smaller arguments to ECX and EDX. */
3913 if (mode == BLKmode || mode == DImode)
3916 /* ECX not EAX is the first allocated register. */
3920 ret = gen_rtx_REG (mode, regno);
3925 if (cum->float_in_sse > 0)
3929 if (cum->float_in_sse > 1)
3932 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3933 rounding takes place when values are passed in x87
3934 registers, pass DFmode and SFmode types to local functions
3935 only when flag_unsafe_math_optimizations is set. */
3936 if (!cum->float_in_x87
3937 || (cum->float_in_x87 == 2
3938 && !flag_unsafe_math_optimizations))
3942 if (!cum->float_in_x87)
3945 if (!type || !AGGREGATE_TYPE_P (type))
3947 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3959 if (!type || !AGGREGATE_TYPE_P (type))
3961 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3964 warning (0, "SSE vector argument without SSE enabled "
3968 ret = gen_reg_or_parallel (mode, orig_mode,
3969 cum->sse_regno + FIRST_SSE_REG);
3976 if (!type || !AGGREGATE_TYPE_P (type))
3978 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3981 warning (0, "MMX vector argument without MMX enabled "
3985 ret = gen_reg_or_parallel (mode, orig_mode,
3986 cum->mmx_regno + FIRST_MMX_REG);
3991 if (TARGET_DEBUG_ARG)
3994 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3995 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3998 print_simple_rtl (stderr, ret);
4000 fprintf (stderr, ", stack");
4002 fprintf (stderr, " )\n");
4008 /* A C expression that indicates when an argument must be passed by
4009 reference. If nonzero for an argument, a copy of that argument is
4010 made in memory and a pointer to the argument is passed instead of
4011 the argument itself. The pointer is passed in whatever way is
4012 appropriate for passing a pointer to that type. */
4015 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4016 enum machine_mode mode ATTRIBUTE_UNUSED,
4017 tree type, bool named ATTRIBUTE_UNUSED)
4022 if (type && int_size_in_bytes (type) == -1)
4024 if (TARGET_DEBUG_ARG)
4025 fprintf (stderr, "function_arg_pass_by_reference\n");
4032 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4033 ABI. Only called if TARGET_SSE. */
4035 contains_128bit_aligned_vector_p (tree type)
4037 enum machine_mode mode = TYPE_MODE (type);
4038 if (SSE_REG_MODE_P (mode)
4039 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4041 if (TYPE_ALIGN (type) < 128)
4044 if (AGGREGATE_TYPE_P (type))
4046 /* Walk the aggregates recursively. */
4047 switch (TREE_CODE (type))
4051 case QUAL_UNION_TYPE:
4055 /* Walk all the structure fields. */
4056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4058 if (TREE_CODE (field) == FIELD_DECL
4059 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4066 /* Just for use if some languages passes arrays by value. */
4067 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4078 /* Gives the alignment boundary, in bits, of an argument with the
4079 specified mode and type. */
4082 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4086 align = TYPE_ALIGN (type);
4088 align = GET_MODE_ALIGNMENT (mode);
4089 if (align < PARM_BOUNDARY)
4090 align = PARM_BOUNDARY;
4093 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4094 make an exception for SSE modes since these require 128bit
4097 The handling here differs from field_alignment. ICC aligns MMX
4098 arguments to 4 byte boundaries, while structure fields are aligned
4099 to 8 byte boundaries. */
4101 align = PARM_BOUNDARY;
4104 if (!SSE_REG_MODE_P (mode))
4105 align = PARM_BOUNDARY;
4109 if (!contains_128bit_aligned_vector_p (type))
4110 align = PARM_BOUNDARY;
4118 /* Return true if N is a possible register number of function value. */
4120 ix86_function_value_regno_p (int regno)
4123 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4124 || (regno == FIRST_SSE_REG && TARGET_SSE))
4128 && (regno == FIRST_MMX_REG && TARGET_MMX))
4134 /* Define how to find the value returned by a function.
4135 VALTYPE is the data type of the value (as a tree).
4136 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4137 otherwise, FUNC is 0. */
4139 ix86_function_value (tree valtype, tree fntype_or_decl,
4140 bool outgoing ATTRIBUTE_UNUSED)
4142 enum machine_mode natmode = type_natural_mode (valtype);
4146 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4147 1, REGPARM_MAX, SSE_REGPARM_MAX,
4148 x86_64_int_return_registers, 0);
4149 /* For zero sized structures, construct_container return NULL, but we
4150 need to keep rest of compiler happy by returning meaningful value. */
4152 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4157 tree fn = NULL_TREE, fntype;
4159 && DECL_P (fntype_or_decl))
4160 fn = fntype_or_decl;
4161 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4162 return gen_rtx_REG (TYPE_MODE (valtype),
4163 ix86_value_regno (natmode, fn, fntype));
4167 /* Return true iff type is returned in memory. */
4169 ix86_return_in_memory (tree type)
4171 int needed_intregs, needed_sseregs, size;
4172 enum machine_mode mode = type_natural_mode (type);
4175 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4177 if (mode == BLKmode)
4180 size = int_size_in_bytes (type);
4182 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4185 if (VECTOR_MODE_P (mode) || mode == TImode)
4187 /* User-created vectors small enough to fit in EAX. */
4191 /* MMX/3dNow values are returned in MM0,
4192 except when it doesn't exits. */
4194 return (TARGET_MMX ? 0 : 1);
4196 /* SSE values are returned in XMM0, except when it doesn't exist. */
4198 return (TARGET_SSE ? 0 : 1);
4212 /* When returning SSE vector types, we have a choice of either
4213 (1) being abi incompatible with a -march switch, or
4214 (2) generating an error.
4215 Given no good solution, I think the safest thing is one warning.
4216 The user won't be able to use -Werror, but....
4218 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4219 called in response to actually generating a caller or callee that
4220 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4221 via aggregate_value_p for general type probing from tree-ssa. */
4224 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4226 static bool warnedsse, warnedmmx;
4230 /* Look at the return type of the function, not the function type. */
4231 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4233 if (!TARGET_SSE && !warnedsse)
4236 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4239 warning (0, "SSE vector return without SSE enabled "
4244 if (!TARGET_MMX && !warnedmmx)
4246 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4249 warning (0, "MMX vector return without MMX enabled "
4258 /* Define how to find the value returned by a library function
4259 assuming the value has mode MODE. */
4261 ix86_libcall_value (enum machine_mode mode)
4275 return gen_rtx_REG (mode, FIRST_SSE_REG);
4278 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4282 return gen_rtx_REG (mode, 0);
4286 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4289 /* Given a mode, return the register to use for a return value. */
4292 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4294 gcc_assert (!TARGET_64BIT);
4296 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4297 we normally prevent this case when mmx is not available. However
4298 some ABIs may require the result to be returned like DImode. */
4299 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4300 return TARGET_MMX ? FIRST_MMX_REG : 0;
4302 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4303 we prevent this case when sse is not available. However some ABIs
4304 may require the result to be returned like integer TImode. */
4305 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4306 return TARGET_SSE ? FIRST_SSE_REG : 0;
4308 /* Decimal floating point values can go in %eax, unlike other float modes. */
4309 if (DECIMAL_FLOAT_MODE_P (mode))
4312 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4313 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4316 /* Floating point return values in %st(0), except for local functions when
4317 SSE math is enabled or for functions with sseregparm attribute. */
4318 if ((func || fntype)
4319 && (mode == SFmode || mode == DFmode))
4321 int sse_level = ix86_function_sseregparm (fntype, func);
4322 if ((sse_level >= 1 && mode == SFmode)
4323 || (sse_level == 2 && mode == DFmode))
4324 return FIRST_SSE_REG;
4327 return FIRST_FLOAT_REG;
4330 /* Create the va_list data type. */
4333 ix86_build_builtin_va_list (void)
4335 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4337 /* For i386 we use plain pointer to argument area. */
4339 return build_pointer_type (char_type_node);
4341 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4342 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4344 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4345 unsigned_type_node);
4346 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4347 unsigned_type_node);
4348 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4350 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4353 va_list_gpr_counter_field = f_gpr;
4354 va_list_fpr_counter_field = f_fpr;
4356 DECL_FIELD_CONTEXT (f_gpr) = record;
4357 DECL_FIELD_CONTEXT (f_fpr) = record;
4358 DECL_FIELD_CONTEXT (f_ovf) = record;
4359 DECL_FIELD_CONTEXT (f_sav) = record;
4361 TREE_CHAIN (record) = type_decl;
4362 TYPE_NAME (record) = type_decl;
4363 TYPE_FIELDS (record) = f_gpr;
4364 TREE_CHAIN (f_gpr) = f_fpr;
4365 TREE_CHAIN (f_fpr) = f_ovf;
4366 TREE_CHAIN (f_ovf) = f_sav;
4368 layout_type (record);
4370 /* The correct type is an array type of one element. */
4371 return build_array_type (record, build_index_type (size_zero_node));
4374 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4377 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4378 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4381 CUMULATIVE_ARGS next_cum;
4382 rtx save_area = NULL_RTX, mem;
4395 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4398 /* Indicate to allocate space on the stack for varargs save area. */
4399 ix86_save_varrargs_registers = 1;
4401 cfun->stack_alignment_needed = 128;
4403 fntype = TREE_TYPE (current_function_decl);
4404 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4405 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4406 != void_type_node));
4408 /* For varargs, we do not want to skip the dummy va_dcl argument.
4409 For stdargs, we do want to skip the last named argument. */
4412 function_arg_advance (&next_cum, mode, type, 1);
4415 save_area = frame_pointer_rtx;
4417 set = get_varargs_alias_set ();
4419 for (i = next_cum.regno;
4421 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4424 mem = gen_rtx_MEM (Pmode,
4425 plus_constant (save_area, i * UNITS_PER_WORD));
4426 MEM_NOTRAP_P (mem) = 1;
4427 set_mem_alias_set (mem, set);
4428 emit_move_insn (mem, gen_rtx_REG (Pmode,
4429 x86_64_int_parameter_registers[i]));
4432 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4434 /* Now emit code to save SSE registers. The AX parameter contains number
4435 of SSE parameter registers used to call this function. We use
4436 sse_prologue_save insn template that produces computed jump across
4437 SSE saves. We need some preparation work to get this working. */
4439 label = gen_label_rtx ();
4440 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4442 /* Compute address to jump to :
4443 label - 5*eax + nnamed_sse_arguments*5 */
4444 tmp_reg = gen_reg_rtx (Pmode);
4445 nsse_reg = gen_reg_rtx (Pmode);
4446 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4447 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4448 gen_rtx_MULT (Pmode, nsse_reg,
4450 if (next_cum.sse_regno)
4453 gen_rtx_CONST (DImode,
4454 gen_rtx_PLUS (DImode,
4456 GEN_INT (next_cum.sse_regno * 4))));
4458 emit_move_insn (nsse_reg, label_ref);
4459 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4461 /* Compute address of memory block we save into. We always use pointer
4462 pointing 127 bytes after first byte to store - this is needed to keep
4463 instruction size limited by 4 bytes. */
4464 tmp_reg = gen_reg_rtx (Pmode);
4465 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4466 plus_constant (save_area,
4467 8 * REGPARM_MAX + 127)));
4468 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4469 MEM_NOTRAP_P (mem) = 1;
4470 set_mem_alias_set (mem, set);
4471 set_mem_align (mem, BITS_PER_WORD);
4473 /* And finally do the dirty job! */
4474 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4475 GEN_INT (next_cum.sse_regno), label));
4480 /* Implement va_start. */
4483 ix86_va_start (tree valist, rtx nextarg)
4485 HOST_WIDE_INT words, n_gpr, n_fpr;
4486 tree f_gpr, f_fpr, f_ovf, f_sav;
4487 tree gpr, fpr, ovf, sav, t;
4490 /* Only 64bit target needs something special. */
4493 std_expand_builtin_va_start (valist, nextarg);
4497 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4498 f_fpr = TREE_CHAIN (f_gpr);
4499 f_ovf = TREE_CHAIN (f_fpr);
4500 f_sav = TREE_CHAIN (f_ovf);
4502 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4503 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4504 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4505 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4506 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4508 /* Count number of gp and fp argument registers used. */
4509 words = current_function_args_info.words;
4510 n_gpr = current_function_args_info.regno;
4511 n_fpr = current_function_args_info.sse_regno;
4513 if (TARGET_DEBUG_ARG)
4514 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4515 (int) words, (int) n_gpr, (int) n_fpr);
4517 if (cfun->va_list_gpr_size)
4519 type = TREE_TYPE (gpr);
4520 t = build2 (MODIFY_EXPR, type, gpr,
4521 build_int_cst (type, n_gpr * 8));
4522 TREE_SIDE_EFFECTS (t) = 1;
4523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4526 if (cfun->va_list_fpr_size)
4528 type = TREE_TYPE (fpr);
4529 t = build2 (MODIFY_EXPR, type, fpr,
4530 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4531 TREE_SIDE_EFFECTS (t) = 1;
4532 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4535 /* Find the overflow area. */
4536 type = TREE_TYPE (ovf);
4537 t = make_tree (type, virtual_incoming_args_rtx);
4539 t = build2 (PLUS_EXPR, type, t,
4540 build_int_cst (type, words * UNITS_PER_WORD));
4541 t = build2 (MODIFY_EXPR, type, ovf, t);
4542 TREE_SIDE_EFFECTS (t) = 1;
4543 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4545 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4547 /* Find the register save area.
4548 Prologue of the function save it right above stack frame. */
4549 type = TREE_TYPE (sav);
4550 t = make_tree (type, frame_pointer_rtx);
4551 t = build2 (MODIFY_EXPR, type, sav, t);
4552 TREE_SIDE_EFFECTS (t) = 1;
4553 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4557 /* Implement va_arg. */
4560 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4562 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4563 tree f_gpr, f_fpr, f_ovf, f_sav;
4564 tree gpr, fpr, ovf, sav, t;
4566 tree lab_false, lab_over = NULL_TREE;
4571 enum machine_mode nat_mode;
4573 /* Only 64bit target needs something special. */
4575 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4577 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4578 f_fpr = TREE_CHAIN (f_gpr);
4579 f_ovf = TREE_CHAIN (f_fpr);
4580 f_sav = TREE_CHAIN (f_ovf);
4582 valist = build_va_arg_indirect_ref (valist);
4583 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4584 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4585 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4586 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4588 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4590 type = build_pointer_type (type);
4591 size = int_size_in_bytes (type);
4592 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4594 nat_mode = type_natural_mode (type);
4595 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4596 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4598 /* Pull the value out of the saved registers. */
4600 addr = create_tmp_var (ptr_type_node, "addr");
4601 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4605 int needed_intregs, needed_sseregs;
4607 tree int_addr, sse_addr;
4609 lab_false = create_artificial_label ();
4610 lab_over = create_artificial_label ();
4612 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4614 need_temp = (!REG_P (container)
4615 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4616 || TYPE_ALIGN (type) > 128));
4618 /* In case we are passing structure, verify that it is consecutive block
4619 on the register save area. If not we need to do moves. */
4620 if (!need_temp && !REG_P (container))
4622 /* Verify that all registers are strictly consecutive */
4623 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4627 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4629 rtx slot = XVECEXP (container, 0, i);
4630 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4631 || INTVAL (XEXP (slot, 1)) != i * 16)
4639 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4641 rtx slot = XVECEXP (container, 0, i);
4642 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4643 || INTVAL (XEXP (slot, 1)) != i * 8)
4655 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4656 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4657 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4658 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4661 /* First ensure that we fit completely in registers. */
4664 t = build_int_cst (TREE_TYPE (gpr),
4665 (REGPARM_MAX - needed_intregs + 1) * 8);
4666 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4667 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4668 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4669 gimplify_and_add (t, pre_p);
4673 t = build_int_cst (TREE_TYPE (fpr),
4674 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4676 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4677 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4678 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4679 gimplify_and_add (t, pre_p);
4682 /* Compute index to start of area used for integer regs. */
4685 /* int_addr = gpr + sav; */
4686 t = fold_convert (ptr_type_node, gpr);
4687 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4688 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4689 gimplify_and_add (t, pre_p);
4693 /* sse_addr = fpr + sav; */
4694 t = fold_convert (ptr_type_node, fpr);
4695 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4696 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4697 gimplify_and_add (t, pre_p);
4702 tree temp = create_tmp_var (type, "va_arg_tmp");
4705 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4706 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4707 gimplify_and_add (t, pre_p);
4709 for (i = 0; i < XVECLEN (container, 0); i++)
4711 rtx slot = XVECEXP (container, 0, i);
4712 rtx reg = XEXP (slot, 0);
4713 enum machine_mode mode = GET_MODE (reg);
4714 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4715 tree addr_type = build_pointer_type (piece_type);
4718 tree dest_addr, dest;
4720 if (SSE_REGNO_P (REGNO (reg)))
4722 src_addr = sse_addr;
4723 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4727 src_addr = int_addr;
4728 src_offset = REGNO (reg) * 8;
4730 src_addr = fold_convert (addr_type, src_addr);
4731 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4732 size_int (src_offset)));
4733 src = build_va_arg_indirect_ref (src_addr);
4735 dest_addr = fold_convert (addr_type, addr);
4736 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4737 size_int (INTVAL (XEXP (slot, 1)))));
4738 dest = build_va_arg_indirect_ref (dest_addr);
4740 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4741 gimplify_and_add (t, pre_p);
4747 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4748 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4749 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4750 gimplify_and_add (t, pre_p);
4754 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4755 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4756 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4757 gimplify_and_add (t, pre_p);
4760 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4761 gimplify_and_add (t, pre_p);
4763 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4764 append_to_statement_list (t, pre_p);
4767 /* ... otherwise out of the overflow area. */
4769 /* Care for on-stack alignment if needed. */
4770 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4771 || integer_zerop (TYPE_SIZE (type)))
4775 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4776 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4777 build_int_cst (TREE_TYPE (ovf), align - 1));
4778 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4779 build_int_cst (TREE_TYPE (t), -align));
4781 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4783 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4784 gimplify_and_add (t2, pre_p);
4786 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4787 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4788 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4789 gimplify_and_add (t, pre_p);
4793 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4794 append_to_statement_list (t, pre_p);
4797 ptrtype = build_pointer_type (type);
4798 addr = fold_convert (ptrtype, addr);
4801 addr = build_va_arg_indirect_ref (addr);
4802 return build_va_arg_indirect_ref (addr);
4805 /* Return nonzero if OPNUM's MEM should be matched
4806 in movabs* patterns. */
4809 ix86_check_movabs (rtx insn, int opnum)
4813 set = PATTERN (insn);
4814 if (GET_CODE (set) == PARALLEL)
4815 set = XVECEXP (set, 0, 0);
4816 gcc_assert (GET_CODE (set) == SET);
4817 mem = XEXP (set, opnum);
4818 while (GET_CODE (mem) == SUBREG)
4819 mem = SUBREG_REG (mem);
4820 gcc_assert (GET_CODE (mem) == MEM);
4821 return (volatile_ok || !MEM_VOLATILE_P (mem));
4824 /* Initialize the table of extra 80387 mathematical constants. */
4827 init_ext_80387_constants (void)
4829 static const char * cst[5] =
4831 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4832 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4833 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4834 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4835 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4839 for (i = 0; i < 5; i++)
4841 real_from_string (&ext_80387_constants_table[i], cst[i]);
4842 /* Ensure each constant is rounded to XFmode precision. */
4843 real_convert (&ext_80387_constants_table[i],
4844 XFmode, &ext_80387_constants_table[i]);
4847 ext_80387_constants_init = 1;
4850 /* Return true if the constant is something that can be loaded with
4851 a special instruction. */
4854 standard_80387_constant_p (rtx x)
4858 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4861 if (x == CONST0_RTX (GET_MODE (x)))
4863 if (x == CONST1_RTX (GET_MODE (x)))
4866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4868 /* For XFmode constants, try to find a special 80387 instruction when
4869 optimizing for size or on those CPUs that benefit from them. */
4870 if (GET_MODE (x) == XFmode
4871 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4875 if (! ext_80387_constants_init)
4876 init_ext_80387_constants ();
4878 for (i = 0; i < 5; i++)
4879 if (real_identical (&r, &ext_80387_constants_table[i]))
4883 /* Load of the constant -0.0 or -1.0 will be split as
4884 fldz;fchs or fld1;fchs sequence. */
4885 if (real_isnegzero (&r))
4887 if (real_identical (&r, &dconstm1))
4893 /* Return the opcode of the special instruction to be used to load
4897 standard_80387_constant_opcode (rtx x)
4899 switch (standard_80387_constant_p (x))
4923 /* Return the CONST_DOUBLE representing the 80387 constant that is
4924 loaded by the specified special instruction. The argument IDX
4925 matches the return value from standard_80387_constant_p. */
4928 standard_80387_constant_rtx (int idx)
4932 if (! ext_80387_constants_init)
4933 init_ext_80387_constants ();
4949 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4953 /* Return 1 if mode is a valid mode for sse. */
4955 standard_sse_mode_p (enum machine_mode mode)
4972 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4975 standard_sse_constant_p (rtx x)
4977 enum machine_mode mode = GET_MODE (x);
4979 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4981 if (vector_all_ones_operand (x, mode)
4982 && standard_sse_mode_p (mode))
4983 return TARGET_SSE2 ? 2 : -1;
4988 /* Return the opcode of the special instruction to be used to load
4992 standard_sse_constant_opcode (rtx insn, rtx x)
4994 switch (standard_sse_constant_p (x))
4997 if (get_attr_mode (insn) == MODE_V4SF)
4998 return "xorps\t%0, %0";
4999 else if (get_attr_mode (insn) == MODE_V2DF)
5000 return "xorpd\t%0, %0";
5002 return "pxor\t%0, %0";
5004 return "pcmpeqd\t%0, %0";
5009 /* Returns 1 if OP contains a symbol reference */
5012 symbolic_reference_mentioned_p (rtx op)
5017 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5020 fmt = GET_RTX_FORMAT (GET_CODE (op));
5021 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5027 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5028 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5032 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5039 /* Return 1 if it is appropriate to emit `ret' instructions in the
5040 body of a function. Do this only if the epilogue is simple, needing a
5041 couple of insns. Prior to reloading, we can't tell how many registers
5042 must be saved, so return 0 then. Return 0 if there is no frame
5043 marker to de-allocate. */
5046 ix86_can_use_return_insn_p (void)
5048 struct ix86_frame frame;
5050 if (! reload_completed || frame_pointer_needed)
5053 /* Don't allow more than 32 pop, since that's all we can do
5054 with one instruction. */
5055 if (current_function_pops_args
5056 && current_function_args_size >= 32768)
5059 ix86_compute_frame_layout (&frame);
5060 return frame.to_allocate == 0 && frame.nregs == 0;
5063 /* Value should be nonzero if functions must have frame pointers.
5064 Zero means the frame pointer need not be set up (and parms may
5065 be accessed via the stack pointer) in functions that seem suitable. */
5068 ix86_frame_pointer_required (void)
5070 /* If we accessed previous frames, then the generated code expects
5071 to be able to access the saved ebp value in our frame. */
5072 if (cfun->machine->accesses_prev_frame)
5075 /* Several x86 os'es need a frame pointer for other reasons,
5076 usually pertaining to setjmp. */
5077 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5080 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5081 the frame pointer by default. Turn it back on now if we've not
5082 got a leaf function. */
5083 if (TARGET_OMIT_LEAF_FRAME_POINTER
5084 && (!current_function_is_leaf
5085 || ix86_current_function_calls_tls_descriptor))
5088 if (current_function_profile)
5094 /* Record that the current function accesses previous call frames. */
5097 ix86_setup_frame_addresses (void)
5099 cfun->machine->accesses_prev_frame = 1;
5102 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5103 # define USE_HIDDEN_LINKONCE 1
5105 # define USE_HIDDEN_LINKONCE 0
5108 static int pic_labels_used;
5110 /* Fills in the label name that should be used for a pc thunk for
5111 the given register. */
5114 get_pc_thunk_name (char name[32], unsigned int regno)
5116 gcc_assert (!TARGET_64BIT);
5118 if (USE_HIDDEN_LINKONCE)
5119 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5121 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5125 /* This function generates code for -fpic that loads %ebx with
5126 the return address of the caller and then returns. */
5129 ix86_file_end (void)
5134 for (regno = 0; regno < 8; ++regno)
5138 if (! ((pic_labels_used >> regno) & 1))
5141 get_pc_thunk_name (name, regno);
5146 switch_to_section (darwin_sections[text_coal_section]);
5147 fputs ("\t.weak_definition\t", asm_out_file);
5148 assemble_name (asm_out_file, name);
5149 fputs ("\n\t.private_extern\t", asm_out_file);
5150 assemble_name (asm_out_file, name);
5151 fputs ("\n", asm_out_file);
5152 ASM_OUTPUT_LABEL (asm_out_file, name);
5156 if (USE_HIDDEN_LINKONCE)
5160 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5162 TREE_PUBLIC (decl) = 1;
5163 TREE_STATIC (decl) = 1;
5164 DECL_ONE_ONLY (decl) = 1;
5166 (*targetm.asm_out.unique_section) (decl, 0);
5167 switch_to_section (get_named_section (decl, NULL, 0));
5169 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5170 fputs ("\t.hidden\t", asm_out_file);
5171 assemble_name (asm_out_file, name);
5172 fputc ('\n', asm_out_file);
5173 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5177 switch_to_section (text_section);
5178 ASM_OUTPUT_LABEL (asm_out_file, name);
5181 xops[0] = gen_rtx_REG (SImode, regno);
5182 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5183 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5184 output_asm_insn ("ret", xops);
5187 if (NEED_INDICATE_EXEC_STACK)
5188 file_end_indicate_exec_stack ();
5191 /* Emit code for the SET_GOT patterns. */
5194 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5199 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5201 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5203 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5206 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5208 output_asm_insn ("call\t%a2", xops);
5211 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5212 is what will be referenced by the Mach-O PIC subsystem. */
5214 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5217 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5218 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5221 output_asm_insn ("pop{l}\t%0", xops);
5226 get_pc_thunk_name (name, REGNO (dest));
5227 pic_labels_used |= 1 << REGNO (dest);
5229 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5230 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5231 output_asm_insn ("call\t%X2", xops);
5232 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5233 is what will be referenced by the Mach-O PIC subsystem. */
5236 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5238 targetm.asm_out.internal_label (asm_out_file, "L",
5239 CODE_LABEL_NUMBER (label));
5246 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5247 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5249 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5254 /* Generate an "push" pattern for input ARG. */
5259 return gen_rtx_SET (VOIDmode,
5261 gen_rtx_PRE_DEC (Pmode,
5262 stack_pointer_rtx)),
5266 /* Return >= 0 if there is an unused call-clobbered register available
5267 for the entire function. */
5270 ix86_select_alt_pic_regnum (void)
5272 if (current_function_is_leaf && !current_function_profile
5273 && !ix86_current_function_calls_tls_descriptor)
5276 for (i = 2; i >= 0; --i)
5277 if (!regs_ever_live[i])
5281 return INVALID_REGNUM;
5284 /* Return 1 if we need to save REGNO. */
5286 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5288 if (pic_offset_table_rtx
5289 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5290 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5291 || current_function_profile
5292 || current_function_calls_eh_return
5293 || current_function_uses_const_pool))
5295 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5300 if (current_function_calls_eh_return && maybe_eh_return)
5305 unsigned test = EH_RETURN_DATA_REGNO (i);
5306 if (test == INVALID_REGNUM)
5313 if (cfun->machine->force_align_arg_pointer
5314 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5317 return (regs_ever_live[regno]
5318 && !call_used_regs[regno]
5319 && !fixed_regs[regno]
5320 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5323 /* Return number of registers to be saved on the stack. */
5326 ix86_nsaved_regs (void)
5331 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5332 if (ix86_save_reg (regno, true))
5337 /* Return the offset between two registers, one to be eliminated, and the other
5338 its replacement, at the start of a routine. */
5341 ix86_initial_elimination_offset (int from, int to)
5343 struct ix86_frame frame;
5344 ix86_compute_frame_layout (&frame);
5346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5347 return frame.hard_frame_pointer_offset;
5348 else if (from == FRAME_POINTER_REGNUM
5349 && to == HARD_FRAME_POINTER_REGNUM)
5350 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5353 gcc_assert (to == STACK_POINTER_REGNUM);
5355 if (from == ARG_POINTER_REGNUM)
5356 return frame.stack_pointer_offset;
5358 gcc_assert (from == FRAME_POINTER_REGNUM);
5359 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5363 /* Fill structure ix86_frame about frame of currently computed function. */
5366 ix86_compute_frame_layout (struct ix86_frame *frame)
5368 HOST_WIDE_INT total_size;
5369 unsigned int stack_alignment_needed;
5370 HOST_WIDE_INT offset;
5371 unsigned int preferred_alignment;
5372 HOST_WIDE_INT size = get_frame_size ();
5374 frame->nregs = ix86_nsaved_regs ();
5377 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5378 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5380 /* During reload iteration the amount of registers saved can change.
5381 Recompute the value as needed. Do not recompute when amount of registers
5382 didn't change as reload does multiple calls to the function and does not
5383 expect the decision to change within single iteration. */
5385 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5387 int count = frame->nregs;
5389 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5390 /* The fast prologue uses move instead of push to save registers. This
5391 is significantly longer, but also executes faster as modern hardware
5392 can execute the moves in parallel, but can't do that for push/pop.
5394 Be careful about choosing what prologue to emit: When function takes
5395 many instructions to execute we may use slow version as well as in
5396 case function is known to be outside hot spot (this is known with
5397 feedback only). Weight the size of function by number of registers
5398 to save as it is cheap to use one or two push instructions but very
5399 slow to use many of them. */
5401 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5402 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5403 || (flag_branch_probabilities
5404 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5405 cfun->machine->use_fast_prologue_epilogue = false;
5407 cfun->machine->use_fast_prologue_epilogue
5408 = !expensive_function_p (count);
5410 if (TARGET_PROLOGUE_USING_MOVE
5411 && cfun->machine->use_fast_prologue_epilogue)
5412 frame->save_regs_using_mov = true;
5414 frame->save_regs_using_mov = false;
5417 /* Skip return address and saved base pointer. */
5418 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5420 frame->hard_frame_pointer_offset = offset;
5422 /* Do some sanity checking of stack_alignment_needed and
5423 preferred_alignment, since i386 port is the only using those features
5424 that may break easily. */
5426 gcc_assert (!size || stack_alignment_needed);
5427 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5428 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5429 gcc_assert (stack_alignment_needed
5430 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5432 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5433 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5435 /* Register save area */
5436 offset += frame->nregs * UNITS_PER_WORD;
5439 if (ix86_save_varrargs_registers)
5441 offset += X86_64_VARARGS_SIZE;
5442 frame->va_arg_size = X86_64_VARARGS_SIZE;
5445 frame->va_arg_size = 0;
5447 /* Align start of frame for local function. */
5448 frame->padding1 = ((offset + stack_alignment_needed - 1)
5449 & -stack_alignment_needed) - offset;
5451 offset += frame->padding1;
5453 /* Frame pointer points here. */
5454 frame->frame_pointer_offset = offset;
5458 /* Add outgoing arguments area. Can be skipped if we eliminated
5459 all the function calls as dead code.
5460 Skipping is however impossible when function calls alloca. Alloca
5461 expander assumes that last current_function_outgoing_args_size
5462 of stack frame are unused. */
5463 if (ACCUMULATE_OUTGOING_ARGS
5464 && (!current_function_is_leaf || current_function_calls_alloca
5465 || ix86_current_function_calls_tls_descriptor))
5467 offset += current_function_outgoing_args_size;
5468 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5471 frame->outgoing_arguments_size = 0;
5473 /* Align stack boundary. Only needed if we're calling another function
5475 if (!current_function_is_leaf || current_function_calls_alloca
5476 || ix86_current_function_calls_tls_descriptor)
5477 frame->padding2 = ((offset + preferred_alignment - 1)
5478 & -preferred_alignment) - offset;
5480 frame->padding2 = 0;
5482 offset += frame->padding2;
5484 /* We've reached end of stack frame. */
5485 frame->stack_pointer_offset = offset;
5487 /* Size prologue needs to allocate. */
5488 frame->to_allocate =
5489 (size + frame->padding1 + frame->padding2
5490 + frame->outgoing_arguments_size + frame->va_arg_size);
5492 if ((!frame->to_allocate && frame->nregs <= 1)
5493 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5494 frame->save_regs_using_mov = false;
5496 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5497 && current_function_is_leaf
5498 && !ix86_current_function_calls_tls_descriptor)
5500 frame->red_zone_size = frame->to_allocate;
5501 if (frame->save_regs_using_mov)
5502 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5503 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5504 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5507 frame->red_zone_size = 0;
5508 frame->to_allocate -= frame->red_zone_size;
5509 frame->stack_pointer_offset -= frame->red_zone_size;
5511 fprintf (stderr, "nregs: %i\n", frame->nregs);
5512 fprintf (stderr, "size: %i\n", size);
5513 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5514 fprintf (stderr, "padding1: %i\n", frame->padding1);
5515 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5516 fprintf (stderr, "padding2: %i\n", frame->padding2);
5517 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5518 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5519 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5520 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5521 frame->hard_frame_pointer_offset);
5522 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5526 /* Emit code to save registers in the prologue. */
5529 ix86_emit_save_regs (void)
5534 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5535 if (ix86_save_reg (regno, true))
5537 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5538 RTX_FRAME_RELATED_P (insn) = 1;
5542 /* Emit code to save registers using MOV insns. First register
5543 is restored from POINTER + OFFSET. */
5545 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5550 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5551 if (ix86_save_reg (regno, true))
5553 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5555 gen_rtx_REG (Pmode, regno));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557 offset += UNITS_PER_WORD;
5561 /* Expand prologue or epilogue stack adjustment.
5562 The pattern exist to put a dependency on all ebp-based memory accesses.
5563 STYLE should be negative if instructions should be marked as frame related,
5564 zero if %r11 register is live and cannot be freely used and positive
5568 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5573 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5574 else if (x86_64_immediate_operand (offset, DImode))
5575 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5579 /* r11 is used by indirect sibcall return as well, set before the
5580 epilogue and used after the epilogue. ATM indirect sibcall
5581 shouldn't be used together with huge frame sizes in one
5582 function because of the frame_size check in sibcall.c. */
5584 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5585 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5587 RTX_FRAME_RELATED_P (insn) = 1;
5588 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5592 RTX_FRAME_RELATED_P (insn) = 1;
5595 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5598 ix86_internal_arg_pointer (void)
5600 bool has_force_align_arg_pointer =
5601 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5602 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5603 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5604 && DECL_NAME (current_function_decl)
5605 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5606 && DECL_FILE_SCOPE_P (current_function_decl))
5607 || ix86_force_align_arg_pointer
5608 || has_force_align_arg_pointer)
5610 /* Nested functions can't realign the stack due to a register
5612 if (DECL_CONTEXT (current_function_decl)
5613 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5615 if (ix86_force_align_arg_pointer)
5616 warning (0, "-mstackrealign ignored for nested functions");
5617 if (has_force_align_arg_pointer)
5618 error ("%s not supported for nested functions",
5619 ix86_force_align_arg_pointer_string);
5620 return virtual_incoming_args_rtx;
5622 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5623 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5626 return virtual_incoming_args_rtx;
5629 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5630 This is called from dwarf2out.c to emit call frame instructions
5631 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5633 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5635 rtx unspec = SET_SRC (pattern);
5636 gcc_assert (GET_CODE (unspec) == UNSPEC);
5640 case UNSPEC_REG_SAVE:
5641 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5642 SET_DEST (pattern));
5644 case UNSPEC_DEF_CFA:
5645 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5646 INTVAL (XVECEXP (unspec, 0, 0)));
5653 /* Expand the prologue into a bunch of separate insns. */
5656 ix86_expand_prologue (void)
5660 struct ix86_frame frame;
5661 HOST_WIDE_INT allocate;
5663 ix86_compute_frame_layout (&frame);
5665 if (cfun->machine->force_align_arg_pointer)
5669 /* Grab the argument pointer. */
5670 x = plus_constant (stack_pointer_rtx, 4);
5671 y = cfun->machine->force_align_arg_pointer;
5672 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5673 RTX_FRAME_RELATED_P (insn) = 1;
5675 /* The unwind info consists of two parts: install the fafp as the cfa,
5676 and record the fafp as the "save register" of the stack pointer.
5677 The later is there in order that the unwinder can see where it
5678 should restore the stack pointer across the and insn. */
5679 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5680 x = gen_rtx_SET (VOIDmode, y, x);
5681 RTX_FRAME_RELATED_P (x) = 1;
5682 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5684 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5685 RTX_FRAME_RELATED_P (y) = 1;
5686 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5687 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5688 REG_NOTES (insn) = x;
5690 /* Align the stack. */
5691 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5694 /* And here we cheat like madmen with the unwind info. We force the
5695 cfa register back to sp+4, which is exactly what it was at the
5696 start of the function. Re-pushing the return address results in
5697 the return at the same spot relative to the cfa, and thus is
5698 correct wrt the unwind info. */
5699 x = cfun->machine->force_align_arg_pointer;
5700 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5701 insn = emit_insn (gen_push (x));
5702 RTX_FRAME_RELATED_P (insn) = 1;
5705 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5706 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5707 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5708 REG_NOTES (insn) = x;
5711 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5712 slower on all targets. Also sdb doesn't like it. */
5714 if (frame_pointer_needed)
5716 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5717 RTX_FRAME_RELATED_P (insn) = 1;
5719 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5720 RTX_FRAME_RELATED_P (insn) = 1;
5723 allocate = frame.to_allocate;
5725 if (!frame.save_regs_using_mov)
5726 ix86_emit_save_regs ();
5728 allocate += frame.nregs * UNITS_PER_WORD;
5730 /* When using red zone we may start register saving before allocating
5731 the stack frame saving one cycle of the prologue. */
5732 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5733 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5734 : stack_pointer_rtx,
5735 -frame.nregs * UNITS_PER_WORD);
5739 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5740 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5741 GEN_INT (-allocate), -1);
5744 /* Only valid for Win32. */
5745 rtx eax = gen_rtx_REG (SImode, 0);
5746 bool eax_live = ix86_eax_live_at_start_p ();
5749 gcc_assert (!TARGET_64BIT);
5753 emit_insn (gen_push (eax));
5757 emit_move_insn (eax, GEN_INT (allocate));
5759 insn = emit_insn (gen_allocate_stack_worker (eax));
5760 RTX_FRAME_RELATED_P (insn) = 1;
5761 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5762 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5763 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5764 t, REG_NOTES (insn));
5768 if (frame_pointer_needed)
5769 t = plus_constant (hard_frame_pointer_rtx,
5772 - frame.nregs * UNITS_PER_WORD);
5774 t = plus_constant (stack_pointer_rtx, allocate);
5775 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5779 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5781 if (!frame_pointer_needed || !frame.to_allocate)
5782 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5784 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5785 -frame.nregs * UNITS_PER_WORD);
5788 pic_reg_used = false;
5789 if (pic_offset_table_rtx
5790 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5791 || current_function_profile))
5793 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5795 if (alt_pic_reg_used != INVALID_REGNUM)
5796 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5798 pic_reg_used = true;
5804 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5806 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5808 /* Even with accurate pre-reload life analysis, we can wind up
5809 deleting all references to the pic register after reload.
5810 Consider if cross-jumping unifies two sides of a branch
5811 controlled by a comparison vs the only read from a global.
5812 In which case, allow the set_got to be deleted, though we're
5813 too late to do anything about the ebx save in the prologue. */
5814 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5817 /* Prevent function calls from be scheduled before the call to mcount.
5818 In the pic_reg_used case, make sure that the got load isn't deleted. */
5819 if (current_function_profile)
5820 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5823 /* Emit code to restore saved registers using MOV insns. First register
5824 is restored from POINTER + OFFSET. */
5826 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5827 int maybe_eh_return)
5830 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5832 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5833 if (ix86_save_reg (regno, maybe_eh_return))
5835 /* Ensure that adjust_address won't be forced to produce pointer
5836 out of range allowed by x86-64 instruction set. */
5837 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5841 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5842 emit_move_insn (r11, GEN_INT (offset));
5843 emit_insn (gen_adddi3 (r11, r11, pointer));
5844 base_address = gen_rtx_MEM (Pmode, r11);
5847 emit_move_insn (gen_rtx_REG (Pmode, regno),
5848 adjust_address (base_address, Pmode, offset));
5849 offset += UNITS_PER_WORD;
5853 /* Restore function stack, frame, and registers. */
5856 ix86_expand_epilogue (int style)
5859 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5860 struct ix86_frame frame;
5861 HOST_WIDE_INT offset;
5863 ix86_compute_frame_layout (&frame);
5865 /* Calculate start of saved registers relative to ebp. Special care
5866 must be taken for the normal return case of a function using
5867 eh_return: the eax and edx registers are marked as saved, but not
5868 restored along this path. */
5869 offset = frame.nregs;
5870 if (current_function_calls_eh_return && style != 2)
5872 offset *= -UNITS_PER_WORD;
5874 /* If we're only restoring one register and sp is not valid then
5875 using a move instruction to restore the register since it's
5876 less work than reloading sp and popping the register.
5878 The default code result in stack adjustment using add/lea instruction,
5879 while this code results in LEAVE instruction (or discrete equivalent),
5880 so it is profitable in some other cases as well. Especially when there
5881 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5882 and there is exactly one register to pop. This heuristic may need some
5883 tuning in future. */
5884 if ((!sp_valid && frame.nregs <= 1)
5885 || (TARGET_EPILOGUE_USING_MOVE
5886 && cfun->machine->use_fast_prologue_epilogue
5887 && (frame.nregs > 1 || frame.to_allocate))
5888 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5889 || (frame_pointer_needed && TARGET_USE_LEAVE
5890 && cfun->machine->use_fast_prologue_epilogue
5891 && frame.nregs == 1)
5892 || current_function_calls_eh_return)
5894 /* Restore registers. We can use ebp or esp to address the memory
5895 locations. If both are available, default to ebp, since offsets
5896 are known to be small. Only exception is esp pointing directly to the
5897 end of block of saved registers, where we may simplify addressing
5900 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5901 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5902 frame.to_allocate, style == 2);
5904 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5905 offset, style == 2);
5907 /* eh_return epilogues need %ecx added to the stack pointer. */
5910 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5912 if (frame_pointer_needed)
5914 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5915 tmp = plus_constant (tmp, UNITS_PER_WORD);
5916 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5918 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5919 emit_move_insn (hard_frame_pointer_rtx, tmp);
5921 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5926 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5927 tmp = plus_constant (tmp, (frame.to_allocate
5928 + frame.nregs * UNITS_PER_WORD));
5929 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5932 else if (!frame_pointer_needed)
5933 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5934 GEN_INT (frame.to_allocate
5935 + frame.nregs * UNITS_PER_WORD),
5937 /* If not an i386, mov & pop is faster than "leave". */
5938 else if (TARGET_USE_LEAVE || optimize_size
5939 || !cfun->machine->use_fast_prologue_epilogue)
5940 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5943 pro_epilogue_adjust_stack (stack_pointer_rtx,
5944 hard_frame_pointer_rtx,
5947 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5949 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5954 /* First step is to deallocate the stack frame so that we can
5955 pop the registers. */
5958 gcc_assert (frame_pointer_needed);
5959 pro_epilogue_adjust_stack (stack_pointer_rtx,
5960 hard_frame_pointer_rtx,
5961 GEN_INT (offset), style);
5963 else if (frame.to_allocate)
5964 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5965 GEN_INT (frame.to_allocate), style);
5967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5968 if (ix86_save_reg (regno, false))
5971 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5973 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5975 if (frame_pointer_needed)
5977 /* Leave results in shorter dependency chains on CPUs that are
5978 able to grok it fast. */
5979 if (TARGET_USE_LEAVE)
5980 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5981 else if (TARGET_64BIT)
5982 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5984 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5988 if (cfun->machine->force_align_arg_pointer)
5990 emit_insn (gen_addsi3 (stack_pointer_rtx,
5991 cfun->machine->force_align_arg_pointer,
5995 /* Sibcall epilogues don't want a return instruction. */
5999 if (current_function_pops_args && current_function_args_size)
6001 rtx popc = GEN_INT (current_function_pops_args);
6003 /* i386 can only pop 64K bytes. If asked to pop more, pop
6004 return address, do explicit add, and jump indirectly to the
6007 if (current_function_pops_args >= 65536)
6009 rtx ecx = gen_rtx_REG (SImode, 2);
6011 /* There is no "pascal" calling convention in 64bit ABI. */
6012 gcc_assert (!TARGET_64BIT);
6014 emit_insn (gen_popsi1 (ecx));
6015 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6016 emit_jump_insn (gen_return_indirect_internal (ecx));
6019 emit_jump_insn (gen_return_pop_internal (popc));
6022 emit_jump_insn (gen_return_internal ());
6025 /* Reset from the function's potential modifications. */
6028 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6029 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6031 if (pic_offset_table_rtx)
6032 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6034 /* Mach-O doesn't support labels at the end of objects, so if
6035 it looks like we might want one, insert a NOP. */
6037 rtx insn = get_last_insn ();
6040 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6041 insn = PREV_INSN (insn);
6045 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6046 fputs ("\tnop\n", file);
6052 /* Extract the parts of an RTL expression that is a valid memory address
6053 for an instruction. Return 0 if the structure of the address is
6054 grossly off. Return -1 if the address contains ASHIFT, so it is not
6055 strictly valid, but still used for computing length of lea instruction. */
6058 ix86_decompose_address (rtx addr, struct ix86_address *out)
6060 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6061 rtx base_reg, index_reg;
6062 HOST_WIDE_INT scale = 1;
6063 rtx scale_rtx = NULL_RTX;
6065 enum ix86_address_seg seg = SEG_DEFAULT;
6067 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6069 else if (GET_CODE (addr) == PLUS)
6079 addends[n++] = XEXP (op, 1);
6082 while (GET_CODE (op) == PLUS);
6087 for (i = n; i >= 0; --i)
6090 switch (GET_CODE (op))
6095 index = XEXP (op, 0);
6096 scale_rtx = XEXP (op, 1);
6100 if (XINT (op, 1) == UNSPEC_TP
6101 && TARGET_TLS_DIRECT_SEG_REFS
6102 && seg == SEG_DEFAULT)
6103 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6132 else if (GET_CODE (addr) == MULT)
6134 index = XEXP (addr, 0); /* index*scale */
6135 scale_rtx = XEXP (addr, 1);
6137 else if (GET_CODE (addr) == ASHIFT)
6141 /* We're called for lea too, which implements ashift on occasion. */
6142 index = XEXP (addr, 0);
6143 tmp = XEXP (addr, 1);
6144 if (GET_CODE (tmp) != CONST_INT)
6146 scale = INTVAL (tmp);
6147 if ((unsigned HOST_WIDE_INT) scale > 3)
6153 disp = addr; /* displacement */
6155 /* Extract the integral value of scale. */
6158 if (GET_CODE (scale_rtx) != CONST_INT)
6160 scale = INTVAL (scale_rtx);
6163 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6164 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6166 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6167 if (base_reg && index_reg && scale == 1
6168 && (index_reg == arg_pointer_rtx
6169 || index_reg == frame_pointer_rtx
6170 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6173 tmp = base, base = index, index = tmp;
6174 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6177 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6178 if ((base_reg == hard_frame_pointer_rtx
6179 || base_reg == frame_pointer_rtx
6180 || base_reg == arg_pointer_rtx) && !disp)
6183 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6184 Avoid this by transforming to [%esi+0]. */
6185 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6186 && base_reg && !index_reg && !disp
6188 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6191 /* Special case: encode reg+reg instead of reg*2. */
6192 if (!base && index && scale && scale == 2)
6193 base = index, base_reg = index_reg, scale = 1;
6195 /* Special case: scaling cannot be encoded without base or displacement. */
6196 if (!base && !disp && index && scale != 1)
6208 /* Return cost of the memory address x.
6209 For i386, it is better to use a complex address than let gcc copy
6210 the address into a reg and make a new pseudo. But not if the address
6211 requires to two regs - that would mean more pseudos with longer
6214 ix86_address_cost (rtx x)
6216 struct ix86_address parts;
6218 int ok = ix86_decompose_address (x, &parts);
6222 if (parts.base && GET_CODE (parts.base) == SUBREG)
6223 parts.base = SUBREG_REG (parts.base);
6224 if (parts.index && GET_CODE (parts.index) == SUBREG)
6225 parts.index = SUBREG_REG (parts.index);
6227 /* More complex memory references are better. */
6228 if (parts.disp && parts.disp != const0_rtx)
6230 if (parts.seg != SEG_DEFAULT)
6233 /* Attempt to minimize number of registers in the address. */
6235 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6237 && (!REG_P (parts.index)
6238 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6242 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6244 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6245 && parts.base != parts.index)
6248 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6249 since it's predecode logic can't detect the length of instructions
6250 and it degenerates to vector decoded. Increase cost of such
6251 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6252 to split such addresses or even refuse such addresses at all.
6254 Following addressing modes are affected:
6259 The first and last case may be avoidable by explicitly coding the zero in
6260 memory address, but I don't have AMD-K6 machine handy to check this
6264 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6265 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6266 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6272 /* If X is a machine specific address (i.e. a symbol or label being
6273 referenced as a displacement from the GOT implemented using an
6274 UNSPEC), then return the base term. Otherwise return X. */
6277 ix86_find_base_term (rtx x)
6283 if (GET_CODE (x) != CONST)
6286 if (GET_CODE (term) == PLUS
6287 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6288 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6289 term = XEXP (term, 0);
6290 if (GET_CODE (term) != UNSPEC
6291 || XINT (term, 1) != UNSPEC_GOTPCREL)
6294 term = XVECEXP (term, 0, 0);
6296 if (GET_CODE (term) != SYMBOL_REF
6297 && GET_CODE (term) != LABEL_REF)
6303 term = ix86_delegitimize_address (x);
6305 if (GET_CODE (term) != SYMBOL_REF
6306 && GET_CODE (term) != LABEL_REF)
6312 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6313 this is used for to form addresses to local data when -fPIC is in
6317 darwin_local_data_pic (rtx disp)
6319 if (GET_CODE (disp) == MINUS)
6321 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6322 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6323 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6325 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6326 if (! strcmp (sym_name, "<pic base>"))
6334 /* Determine if a given RTX is a valid constant. We already know this
6335 satisfies CONSTANT_P. */
6338 legitimate_constant_p (rtx x)
6340 switch (GET_CODE (x))
6345 if (GET_CODE (x) == PLUS)
6347 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6352 if (TARGET_MACHO && darwin_local_data_pic (x))
6355 /* Only some unspecs are valid as "constants". */
6356 if (GET_CODE (x) == UNSPEC)
6357 switch (XINT (x, 1))
6360 return TARGET_64BIT;
6363 x = XVECEXP (x, 0, 0);
6364 return (GET_CODE (x) == SYMBOL_REF
6365 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6367 x = XVECEXP (x, 0, 0);
6368 return (GET_CODE (x) == SYMBOL_REF
6369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6374 /* We must have drilled down to a symbol. */
6375 if (GET_CODE (x) == LABEL_REF)
6377 if (GET_CODE (x) != SYMBOL_REF)
6382 /* TLS symbols are never valid. */
6383 if (SYMBOL_REF_TLS_MODEL (x))
6388 if (GET_MODE (x) == TImode
6389 && x != CONST0_RTX (TImode)
6395 if (x == CONST0_RTX (GET_MODE (x)))
6403 /* Otherwise we handle everything else in the move patterns. */
6407 /* Determine if it's legal to put X into the constant pool. This
6408 is not possible for the address of thread-local symbols, which
6409 is checked above. */
6412 ix86_cannot_force_const_mem (rtx x)
6414 /* We can always put integral constants and vectors in memory. */
6415 switch (GET_CODE (x))
6425 return !legitimate_constant_p (x);
6428 /* Determine if a given RTX is a valid constant address. */
6431 constant_address_p (rtx x)
6433 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6436 /* Nonzero if the constant value X is a legitimate general operand
6437 when generating PIC code. It is given that flag_pic is on and
6438 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6441 legitimate_pic_operand_p (rtx x)
6445 switch (GET_CODE (x))
6448 inner = XEXP (x, 0);
6449 if (GET_CODE (inner) == PLUS
6450 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6451 inner = XEXP (inner, 0);
6453 /* Only some unspecs are valid as "constants". */
6454 if (GET_CODE (inner) == UNSPEC)
6455 switch (XINT (inner, 1))
6458 return TARGET_64BIT;
6460 x = XVECEXP (inner, 0, 0);
6461 return (GET_CODE (x) == SYMBOL_REF
6462 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6470 return legitimate_pic_address_disp_p (x);
6477 /* Determine if a given CONST RTX is a valid memory displacement
6481 legitimate_pic_address_disp_p (rtx disp)
6485 /* In 64bit mode we can allow direct addresses of symbols and labels
6486 when they are not dynamic symbols. */
6489 rtx op0 = disp, op1;
6491 switch (GET_CODE (disp))
6497 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6499 op0 = XEXP (XEXP (disp, 0), 0);
6500 op1 = XEXP (XEXP (disp, 0), 1);
6501 if (GET_CODE (op1) != CONST_INT
6502 || INTVAL (op1) >= 16*1024*1024
6503 || INTVAL (op1) < -16*1024*1024)
6505 if (GET_CODE (op0) == LABEL_REF)
6507 if (GET_CODE (op0) != SYMBOL_REF)
6512 /* TLS references should always be enclosed in UNSPEC. */
6513 if (SYMBOL_REF_TLS_MODEL (op0))
6515 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6523 if (GET_CODE (disp) != CONST)
6525 disp = XEXP (disp, 0);
6529 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6530 of GOT tables. We should not need these anyway. */
6531 if (GET_CODE (disp) != UNSPEC
6532 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6533 && XINT (disp, 1) != UNSPEC_GOTOFF))
6536 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6537 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6543 if (GET_CODE (disp) == PLUS)
6545 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6547 disp = XEXP (disp, 0);
6551 if (TARGET_MACHO && darwin_local_data_pic (disp))
6554 if (GET_CODE (disp) != UNSPEC)
6557 switch (XINT (disp, 1))
6562 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6564 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6565 While ABI specify also 32bit relocation but we don't produce it in
6566 small PIC model at all. */
6567 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6568 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6570 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6572 case UNSPEC_GOTTPOFF:
6573 case UNSPEC_GOTNTPOFF:
6574 case UNSPEC_INDNTPOFF:
6577 disp = XVECEXP (disp, 0, 0);
6578 return (GET_CODE (disp) == SYMBOL_REF
6579 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6581 disp = XVECEXP (disp, 0, 0);
6582 return (GET_CODE (disp) == SYMBOL_REF
6583 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6585 disp = XVECEXP (disp, 0, 0);
6586 return (GET_CODE (disp) == SYMBOL_REF
6587 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6593 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6594 memory address for an instruction. The MODE argument is the machine mode
6595 for the MEM expression that wants to use this address.
6597 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6598 convert common non-canonical forms to canonical form so that they will
6602 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6604 struct ix86_address parts;
6605 rtx base, index, disp;
6606 HOST_WIDE_INT scale;
6607 const char *reason = NULL;
6608 rtx reason_rtx = NULL_RTX;
6610 if (TARGET_DEBUG_ADDR)
6613 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6614 GET_MODE_NAME (mode), strict);
6618 if (ix86_decompose_address (addr, &parts) <= 0)
6620 reason = "decomposition failed";
6625 index = parts.index;
6627 scale = parts.scale;
6629 /* Validate base register.
6631 Don't allow SUBREG's that span more than a word here. It can lead to spill
6632 failures when the base is one word out of a two word structure, which is
6633 represented internally as a DImode int. */
6642 else if (GET_CODE (base) == SUBREG
6643 && REG_P (SUBREG_REG (base))
6644 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6646 reg = SUBREG_REG (base);
6649 reason = "base is not a register";
6653 if (GET_MODE (base) != Pmode)
6655 reason = "base is not in Pmode";
6659 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6660 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6662 reason = "base is not valid";
6667 /* Validate index register.
6669 Don't allow SUBREG's that span more than a word here -- same as above. */
6678 else if (GET_CODE (index) == SUBREG
6679 && REG_P (SUBREG_REG (index))
6680 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6682 reg = SUBREG_REG (index);
6685 reason = "index is not a register";
6689 if (GET_MODE (index) != Pmode)
6691 reason = "index is not in Pmode";
6695 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6696 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6698 reason = "index is not valid";
6703 /* Validate scale factor. */
6706 reason_rtx = GEN_INT (scale);
6709 reason = "scale without index";
6713 if (scale != 2 && scale != 4 && scale != 8)
6715 reason = "scale is not a valid multiplier";
6720 /* Validate displacement. */
6725 if (GET_CODE (disp) == CONST
6726 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6727 switch (XINT (XEXP (disp, 0), 1))
6729 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6730 used. While ABI specify also 32bit relocations, we don't produce
6731 them at all and use IP relative instead. */
6734 gcc_assert (flag_pic);
6736 goto is_legitimate_pic;
6737 reason = "64bit address unspec";
6740 case UNSPEC_GOTPCREL:
6741 gcc_assert (flag_pic);
6742 goto is_legitimate_pic;
6744 case UNSPEC_GOTTPOFF:
6745 case UNSPEC_GOTNTPOFF:
6746 case UNSPEC_INDNTPOFF:
6752 reason = "invalid address unspec";
6756 else if (SYMBOLIC_CONST (disp)
6760 && MACHOPIC_INDIRECT
6761 && !machopic_operand_p (disp)
6767 if (TARGET_64BIT && (index || base))
6769 /* foo@dtpoff(%rX) is ok. */
6770 if (GET_CODE (disp) != CONST
6771 || GET_CODE (XEXP (disp, 0)) != PLUS
6772 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6773 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6774 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6775 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6777 reason = "non-constant pic memory reference";
6781 else if (! legitimate_pic_address_disp_p (disp))
6783 reason = "displacement is an invalid pic construct";
6787 /* This code used to verify that a symbolic pic displacement
6788 includes the pic_offset_table_rtx register.
6790 While this is good idea, unfortunately these constructs may
6791 be created by "adds using lea" optimization for incorrect
6800 This code is nonsensical, but results in addressing
6801 GOT table with pic_offset_table_rtx base. We can't
6802 just refuse it easily, since it gets matched by
6803 "addsi3" pattern, that later gets split to lea in the
6804 case output register differs from input. While this
6805 can be handled by separate addsi pattern for this case
6806 that never results in lea, this seems to be easier and
6807 correct fix for crash to disable this test. */
6809 else if (GET_CODE (disp) != LABEL_REF
6810 && GET_CODE (disp) != CONST_INT
6811 && (GET_CODE (disp) != CONST
6812 || !legitimate_constant_p (disp))
6813 && (GET_CODE (disp) != SYMBOL_REF
6814 || !legitimate_constant_p (disp)))
6816 reason = "displacement is not constant";
6819 else if (TARGET_64BIT
6820 && !x86_64_immediate_operand (disp, VOIDmode))
6822 reason = "displacement is out of range";
6827 /* Everything looks valid. */
6828 if (TARGET_DEBUG_ADDR)
6829 fprintf (stderr, "Success.\n");
6833 if (TARGET_DEBUG_ADDR)
6835 fprintf (stderr, "Error: %s\n", reason);
6836 debug_rtx (reason_rtx);
6841 /* Return a unique alias set for the GOT. */
6843 static HOST_WIDE_INT
6844 ix86_GOT_alias_set (void)
6846 static HOST_WIDE_INT set = -1;
6848 set = new_alias_set ();
6852 /* Return a legitimate reference for ORIG (an address) using the
6853 register REG. If REG is 0, a new pseudo is generated.
6855 There are two types of references that must be handled:
6857 1. Global data references must load the address from the GOT, via
6858 the PIC reg. An insn is emitted to do this load, and the reg is
6861 2. Static data references, constant pool addresses, and code labels
6862 compute the address as an offset from the GOT, whose base is in
6863 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6864 differentiate them from global data objects. The returned
6865 address is the PIC reg + an unspec constant.
6867 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6868 reg also appears in the address. */
6871 legitimize_pic_address (rtx orig, rtx reg)
6878 if (TARGET_MACHO && !TARGET_64BIT)
6881 reg = gen_reg_rtx (Pmode);
6882 /* Use the generic Mach-O PIC machinery. */
6883 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6887 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6889 else if (TARGET_64BIT
6890 && ix86_cmodel != CM_SMALL_PIC
6891 && local_symbolic_operand (addr, Pmode))
6894 /* This symbol may be referenced via a displacement from the PIC
6895 base address (@GOTOFF). */
6897 if (reload_in_progress)
6898 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6899 if (GET_CODE (addr) == CONST)
6900 addr = XEXP (addr, 0);
6901 if (GET_CODE (addr) == PLUS)
6903 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6904 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6907 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6908 new = gen_rtx_CONST (Pmode, new);
6910 tmpreg = gen_reg_rtx (Pmode);
6913 emit_move_insn (tmpreg, new);
6917 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6918 tmpreg, 1, OPTAB_DIRECT);
6921 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6923 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6925 /* This symbol may be referenced via a displacement from the PIC
6926 base address (@GOTOFF). */
6928 if (reload_in_progress)
6929 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6930 if (GET_CODE (addr) == CONST)
6931 addr = XEXP (addr, 0);
6932 if (GET_CODE (addr) == PLUS)
6934 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6935 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6938 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6939 new = gen_rtx_CONST (Pmode, new);
6940 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6944 emit_move_insn (reg, new);
6948 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6952 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6953 new = gen_rtx_CONST (Pmode, new);
6954 new = gen_const_mem (Pmode, new);
6955 set_mem_alias_set (new, ix86_GOT_alias_set ());
6958 reg = gen_reg_rtx (Pmode);
6959 /* Use directly gen_movsi, otherwise the address is loaded
6960 into register for CSE. We don't want to CSE this addresses,
6961 instead we CSE addresses from the GOT table, so skip this. */
6962 emit_insn (gen_movsi (reg, new));
6967 /* This symbol must be referenced via a load from the
6968 Global Offset Table (@GOT). */
6970 if (reload_in_progress)
6971 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6972 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6973 new = gen_rtx_CONST (Pmode, new);
6974 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6975 new = gen_const_mem (Pmode, new);
6976 set_mem_alias_set (new, ix86_GOT_alias_set ());
6979 reg = gen_reg_rtx (Pmode);
6980 emit_move_insn (reg, new);
6986 if (GET_CODE (addr) == CONST_INT
6987 && !x86_64_immediate_operand (addr, VOIDmode))
6991 emit_move_insn (reg, addr);
6995 new = force_reg (Pmode, addr);
6997 else if (GET_CODE (addr) == CONST)
6999 addr = XEXP (addr, 0);
7001 /* We must match stuff we generate before. Assume the only
7002 unspecs that can get here are ours. Not that we could do
7003 anything with them anyway.... */
7004 if (GET_CODE (addr) == UNSPEC
7005 || (GET_CODE (addr) == PLUS
7006 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7008 gcc_assert (GET_CODE (addr) == PLUS);
7010 if (GET_CODE (addr) == PLUS)
7012 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7014 /* Check first to see if this is a constant offset from a @GOTOFF
7015 symbol reference. */
7016 if (local_symbolic_operand (op0, Pmode)
7017 && GET_CODE (op1) == CONST_INT)
7021 if (reload_in_progress)
7022 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7023 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7025 new = gen_rtx_PLUS (Pmode, new, op1);
7026 new = gen_rtx_CONST (Pmode, new);
7027 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7031 emit_move_insn (reg, new);
7037 if (INTVAL (op1) < -16*1024*1024
7038 || INTVAL (op1) >= 16*1024*1024)
7040 if (!x86_64_immediate_operand (op1, Pmode))
7041 op1 = force_reg (Pmode, op1);
7042 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7048 base = legitimize_pic_address (XEXP (addr, 0), reg);
7049 new = legitimize_pic_address (XEXP (addr, 1),
7050 base == reg ? NULL_RTX : reg);
7052 if (GET_CODE (new) == CONST_INT)
7053 new = plus_constant (base, INTVAL (new));
7056 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7058 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7059 new = XEXP (new, 1);
7061 new = gen_rtx_PLUS (Pmode, base, new);
7069 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7072 get_thread_pointer (int to_reg)
7076 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7080 reg = gen_reg_rtx (Pmode);
7081 insn = gen_rtx_SET (VOIDmode, reg, tp);
7082 insn = emit_insn (insn);
7087 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7088 false if we expect this to be used for a memory address and true if
7089 we expect to load the address into a register. */
7092 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7094 rtx dest, base, off, pic, tp;
7099 case TLS_MODEL_GLOBAL_DYNAMIC:
7100 dest = gen_reg_rtx (Pmode);
7101 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7103 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7105 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7108 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7109 insns = get_insns ();
7112 emit_libcall_block (insns, dest, rax, x);
7114 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7115 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7117 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7119 if (TARGET_GNU2_TLS)
7121 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7123 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7127 case TLS_MODEL_LOCAL_DYNAMIC:
7128 base = gen_reg_rtx (Pmode);
7129 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7131 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7133 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7136 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7137 insns = get_insns ();
7140 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7141 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7142 emit_libcall_block (insns, base, rax, note);
7144 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7145 emit_insn (gen_tls_local_dynamic_base_64 (base));
7147 emit_insn (gen_tls_local_dynamic_base_32 (base));
7149 if (TARGET_GNU2_TLS)
7151 rtx x = ix86_tls_module_base ();
7153 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7154 gen_rtx_MINUS (Pmode, x, tp));
7157 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7158 off = gen_rtx_CONST (Pmode, off);
7160 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7162 if (TARGET_GNU2_TLS)
7164 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7166 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7171 case TLS_MODEL_INITIAL_EXEC:
7175 type = UNSPEC_GOTNTPOFF;
7179 if (reload_in_progress)
7180 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7181 pic = pic_offset_table_rtx;
7182 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7184 else if (!TARGET_ANY_GNU_TLS)
7186 pic = gen_reg_rtx (Pmode);
7187 emit_insn (gen_set_got (pic));
7188 type = UNSPEC_GOTTPOFF;
7193 type = UNSPEC_INDNTPOFF;
7196 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7197 off = gen_rtx_CONST (Pmode, off);
7199 off = gen_rtx_PLUS (Pmode, pic, off);
7200 off = gen_const_mem (Pmode, off);
7201 set_mem_alias_set (off, ix86_GOT_alias_set ());
7203 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7205 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7206 off = force_reg (Pmode, off);
7207 return gen_rtx_PLUS (Pmode, base, off);
7211 base = get_thread_pointer (true);
7212 dest = gen_reg_rtx (Pmode);
7213 emit_insn (gen_subsi3 (dest, base, off));
7217 case TLS_MODEL_LOCAL_EXEC:
7218 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7219 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7220 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7221 off = gen_rtx_CONST (Pmode, off);
7223 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7225 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7226 return gen_rtx_PLUS (Pmode, base, off);
7230 base = get_thread_pointer (true);
7231 dest = gen_reg_rtx (Pmode);
7232 emit_insn (gen_subsi3 (dest, base, off));
7243 /* Try machine-dependent ways of modifying an illegitimate address
7244 to be legitimate. If we find one, return the new, valid address.
7245 This macro is used in only one place: `memory_address' in explow.c.
7247 OLDX is the address as it was before break_out_memory_refs was called.
7248 In some cases it is useful to look at this to decide what needs to be done.
7250 MODE and WIN are passed so that this macro can use
7251 GO_IF_LEGITIMATE_ADDRESS.
7253 It is always safe for this macro to do nothing. It exists to recognize
7254 opportunities to optimize the output.
7256 For the 80386, we handle X+REG by loading X into a register R and
7257 using R+REG. R will go in a general reg and indexing will be used.
7258 However, if REG is a broken-out memory address or multiplication,
7259 nothing needs to be done because REG can certainly go in a general reg.
7261 When -fpic is used, special handling is needed for symbolic references.
7262 See comments by legitimize_pic_address in i386.c for details. */
7265 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7270 if (TARGET_DEBUG_ADDR)
7272 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7273 GET_MODE_NAME (mode));
7277 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7279 return legitimize_tls_address (x, log, false);
7280 if (GET_CODE (x) == CONST
7281 && GET_CODE (XEXP (x, 0)) == PLUS
7282 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7283 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7285 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7286 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7289 if (flag_pic && SYMBOLIC_CONST (x))
7290 return legitimize_pic_address (x, 0);
7292 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7293 if (GET_CODE (x) == ASHIFT
7294 && GET_CODE (XEXP (x, 1)) == CONST_INT
7295 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7298 log = INTVAL (XEXP (x, 1));
7299 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7300 GEN_INT (1 << log));
7303 if (GET_CODE (x) == PLUS)
7305 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7307 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7308 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7309 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7312 log = INTVAL (XEXP (XEXP (x, 0), 1));
7313 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7314 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7315 GEN_INT (1 << log));
7318 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7319 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7320 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7323 log = INTVAL (XEXP (XEXP (x, 1), 1));
7324 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7325 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7326 GEN_INT (1 << log));
7329 /* Put multiply first if it isn't already. */
7330 if (GET_CODE (XEXP (x, 1)) == MULT)
7332 rtx tmp = XEXP (x, 0);
7333 XEXP (x, 0) = XEXP (x, 1);
7338 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7339 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7340 created by virtual register instantiation, register elimination, and
7341 similar optimizations. */
7342 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7345 x = gen_rtx_PLUS (Pmode,
7346 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7347 XEXP (XEXP (x, 1), 0)),
7348 XEXP (XEXP (x, 1), 1));
7352 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7353 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7354 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7355 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7356 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7357 && CONSTANT_P (XEXP (x, 1)))
7360 rtx other = NULL_RTX;
7362 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7364 constant = XEXP (x, 1);
7365 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7367 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7369 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7370 other = XEXP (x, 1);
7378 x = gen_rtx_PLUS (Pmode,
7379 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7380 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7381 plus_constant (other, INTVAL (constant)));
7385 if (changed && legitimate_address_p (mode, x, FALSE))
7388 if (GET_CODE (XEXP (x, 0)) == MULT)
7391 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7394 if (GET_CODE (XEXP (x, 1)) == MULT)
7397 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7401 && GET_CODE (XEXP (x, 1)) == REG
7402 && GET_CODE (XEXP (x, 0)) == REG)
7405 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7408 x = legitimize_pic_address (x, 0);
7411 if (changed && legitimate_address_p (mode, x, FALSE))
7414 if (GET_CODE (XEXP (x, 0)) == REG)
7416 rtx temp = gen_reg_rtx (Pmode);
7417 rtx val = force_operand (XEXP (x, 1), temp);
7419 emit_move_insn (temp, val);
7425 else if (GET_CODE (XEXP (x, 1)) == REG)
7427 rtx temp = gen_reg_rtx (Pmode);
7428 rtx val = force_operand (XEXP (x, 0), temp);
7430 emit_move_insn (temp, val);
7440 /* Print an integer constant expression in assembler syntax. Addition
7441 and subtraction are the only arithmetic that may appear in these
7442 expressions. FILE is the stdio stream to write to, X is the rtx, and
7443 CODE is the operand print code from the output string. */
7446 output_pic_addr_const (FILE *file, rtx x, int code)
7450 switch (GET_CODE (x))
7453 gcc_assert (flag_pic);
7458 output_addr_const (file, x);
7459 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7460 fputs ("@PLT", file);
7467 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7468 assemble_name (asm_out_file, buf);
7472 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7476 /* This used to output parentheses around the expression,
7477 but that does not work on the 386 (either ATT or BSD assembler). */
7478 output_pic_addr_const (file, XEXP (x, 0), code);
7482 if (GET_MODE (x) == VOIDmode)
7484 /* We can use %d if the number is <32 bits and positive. */
7485 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7486 fprintf (file, "0x%lx%08lx",
7487 (unsigned long) CONST_DOUBLE_HIGH (x),
7488 (unsigned long) CONST_DOUBLE_LOW (x));
7490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7493 /* We can't handle floating point constants;
7494 PRINT_OPERAND must handle them. */
7495 output_operand_lossage ("floating constant misused");
7499 /* Some assemblers need integer constants to appear first. */
7500 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7502 output_pic_addr_const (file, XEXP (x, 0), code);
7504 output_pic_addr_const (file, XEXP (x, 1), code);
7508 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7509 output_pic_addr_const (file, XEXP (x, 1), code);
7511 output_pic_addr_const (file, XEXP (x, 0), code);
7517 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7518 output_pic_addr_const (file, XEXP (x, 0), code);
7520 output_pic_addr_const (file, XEXP (x, 1), code);
7522 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7526 gcc_assert (XVECLEN (x, 0) == 1);
7527 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7528 switch (XINT (x, 1))
7531 fputs ("@GOT", file);
7534 fputs ("@GOTOFF", file);
7536 case UNSPEC_GOTPCREL:
7537 fputs ("@GOTPCREL(%rip)", file);
7539 case UNSPEC_GOTTPOFF:
7540 /* FIXME: This might be @TPOFF in Sun ld too. */
7541 fputs ("@GOTTPOFF", file);
7544 fputs ("@TPOFF", file);
7548 fputs ("@TPOFF", file);
7550 fputs ("@NTPOFF", file);
7553 fputs ("@DTPOFF", file);
7555 case UNSPEC_GOTNTPOFF:
7557 fputs ("@GOTTPOFF(%rip)", file);
7559 fputs ("@GOTNTPOFF", file);
7561 case UNSPEC_INDNTPOFF:
7562 fputs ("@INDNTPOFF", file);
7565 output_operand_lossage ("invalid UNSPEC as operand");
7571 output_operand_lossage ("invalid expression as operand");
7575 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7576 We need to emit DTP-relative relocations. */
7579 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7581 fputs (ASM_LONG, file);
7582 output_addr_const (file, x);
7583 fputs ("@DTPOFF", file);
7589 fputs (", 0", file);
7596 /* In the name of slightly smaller debug output, and to cater to
7597 general assembler lossage, recognize PIC+GOTOFF and turn it back
7598 into a direct symbol reference.
7600 On Darwin, this is necessary to avoid a crash, because Darwin
7601 has a different PIC label for each routine but the DWARF debugging
7602 information is not associated with any particular routine, so it's
7603 necessary to remove references to the PIC label from RTL stored by
7604 the DWARF output code. */
7607 ix86_delegitimize_address (rtx orig_x)
7610 /* reg_addend is NULL or a multiple of some register. */
7611 rtx reg_addend = NULL_RTX;
7612 /* const_addend is NULL or a const_int. */
7613 rtx const_addend = NULL_RTX;
7614 /* This is the result, or NULL. */
7615 rtx result = NULL_RTX;
7617 if (GET_CODE (x) == MEM)
7622 if (GET_CODE (x) != CONST
7623 || GET_CODE (XEXP (x, 0)) != UNSPEC
7624 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7625 || GET_CODE (orig_x) != MEM)
7627 return XVECEXP (XEXP (x, 0), 0, 0);
7630 if (GET_CODE (x) != PLUS
7631 || GET_CODE (XEXP (x, 1)) != CONST)
7634 if (GET_CODE (XEXP (x, 0)) == REG
7635 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7636 /* %ebx + GOT/GOTOFF */
7638 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7640 /* %ebx + %reg * scale + GOT/GOTOFF */
7641 reg_addend = XEXP (x, 0);
7642 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7643 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7644 reg_addend = XEXP (reg_addend, 1);
7645 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7646 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7647 reg_addend = XEXP (reg_addend, 0);
7650 if (GET_CODE (reg_addend) != REG
7651 && GET_CODE (reg_addend) != MULT
7652 && GET_CODE (reg_addend) != ASHIFT)
7658 x = XEXP (XEXP (x, 1), 0);
7659 if (GET_CODE (x) == PLUS
7660 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7662 const_addend = XEXP (x, 1);
7666 if (GET_CODE (x) == UNSPEC
7667 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7668 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7669 result = XVECEXP (x, 0, 0);
7671 if (TARGET_MACHO && darwin_local_data_pic (x)
7672 && GET_CODE (orig_x) != MEM)
7673 result = XEXP (x, 0);
7679 result = gen_rtx_PLUS (Pmode, result, const_addend);
7681 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7686 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7691 if (mode == CCFPmode || mode == CCFPUmode)
7693 enum rtx_code second_code, bypass_code;
7694 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7695 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7696 code = ix86_fp_compare_code_to_integer (code);
7700 code = reverse_condition (code);
7711 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7715 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7716 Those same assemblers have the same but opposite lossage on cmov. */
7717 gcc_assert (mode == CCmode);
7718 suffix = fp ? "nbe" : "a";
7738 gcc_assert (mode == CCmode);
7760 gcc_assert (mode == CCmode);
7761 suffix = fp ? "nb" : "ae";
7764 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7768 gcc_assert (mode == CCmode);
7772 suffix = fp ? "u" : "p";
7775 suffix = fp ? "nu" : "np";
7780 fputs (suffix, file);
7783 /* Print the name of register X to FILE based on its machine mode and number.
7784 If CODE is 'w', pretend the mode is HImode.
7785 If CODE is 'b', pretend the mode is QImode.
7786 If CODE is 'k', pretend the mode is SImode.
7787 If CODE is 'q', pretend the mode is DImode.
7788 If CODE is 'h', pretend the reg is the 'high' byte register.
7789 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7792 print_reg (rtx x, int code, FILE *file)
7794 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7795 && REGNO (x) != FRAME_POINTER_REGNUM
7796 && REGNO (x) != FLAGS_REG
7797 && REGNO (x) != FPSR_REG
7798 && REGNO (x) != FPCR_REG);
7800 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7803 if (code == 'w' || MMX_REG_P (x))
7805 else if (code == 'b')
7807 else if (code == 'k')
7809 else if (code == 'q')
7811 else if (code == 'y')
7813 else if (code == 'h')
7816 code = GET_MODE_SIZE (GET_MODE (x));
7818 /* Irritatingly, AMD extended registers use different naming convention
7819 from the normal registers. */
7820 if (REX_INT_REG_P (x))
7822 gcc_assert (TARGET_64BIT);
7826 error ("extended registers have no high halves");
7829 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7832 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7835 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7838 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7841 error ("unsupported operand size for extended register");
7849 if (STACK_TOP_P (x))
7851 fputs ("st(0)", file);
7858 if (! ANY_FP_REG_P (x))
7859 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7864 fputs (hi_reg_name[REGNO (x)], file);
7867 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7869 fputs (qi_reg_name[REGNO (x)], file);
7872 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7874 fputs (qi_high_reg_name[REGNO (x)], file);
7881 /* Locate some local-dynamic symbol still in use by this function
7882 so that we can print its name in some tls_local_dynamic_base
7886 get_some_local_dynamic_name (void)
7890 if (cfun->machine->some_ld_name)
7891 return cfun->machine->some_ld_name;
7893 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7895 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7896 return cfun->machine->some_ld_name;
7902 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7906 if (GET_CODE (x) == SYMBOL_REF
7907 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7909 cfun->machine->some_ld_name = XSTR (x, 0);
7917 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7918 C -- print opcode suffix for set/cmov insn.
7919 c -- like C, but print reversed condition
7920 F,f -- likewise, but for floating-point.
7921 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7923 R -- print the prefix for register names.
7924 z -- print the opcode suffix for the size of the current operand.
7925 * -- print a star (in certain assembler syntax)
7926 A -- print an absolute memory reference.
7927 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7928 s -- print a shift double count, followed by the assemblers argument
7930 b -- print the QImode name of the register for the indicated operand.
7931 %b0 would print %al if operands[0] is reg 0.
7932 w -- likewise, print the HImode name of the register.
7933 k -- likewise, print the SImode name of the register.
7934 q -- likewise, print the DImode name of the register.
7935 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7936 y -- print "st(0)" instead of "st" as a register.
7937 D -- print condition for SSE cmp instruction.
7938 P -- if PIC, print an @PLT suffix.
7939 X -- don't print any sort of PIC '@' suffix for a symbol.
7940 & -- print some in-use local-dynamic symbol name.
7941 H -- print a memory address offset by 8; used for sse high-parts
7945 print_operand (FILE *file, rtx x, int code)
7952 if (ASSEMBLER_DIALECT == ASM_ATT)
7957 assemble_name (file, get_some_local_dynamic_name ());
7961 switch (ASSEMBLER_DIALECT)
7968 /* Intel syntax. For absolute addresses, registers should not
7969 be surrounded by braces. */
7970 if (GET_CODE (x) != REG)
7973 PRINT_OPERAND (file, x, 0);
7983 PRINT_OPERAND (file, x, 0);
7988 if (ASSEMBLER_DIALECT == ASM_ATT)
7993 if (ASSEMBLER_DIALECT == ASM_ATT)
7998 if (ASSEMBLER_DIALECT == ASM_ATT)
8003 if (ASSEMBLER_DIALECT == ASM_ATT)
8008 if (ASSEMBLER_DIALECT == ASM_ATT)
8013 if (ASSEMBLER_DIALECT == ASM_ATT)
8018 /* 387 opcodes don't get size suffixes if the operands are
8020 if (STACK_REG_P (x))
8023 /* Likewise if using Intel opcodes. */
8024 if (ASSEMBLER_DIALECT == ASM_INTEL)
8027 /* This is the size of op from size of operand. */
8028 switch (GET_MODE_SIZE (GET_MODE (x)))
8031 #ifdef HAVE_GAS_FILDS_FISTS
8037 if (GET_MODE (x) == SFmode)
8052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8054 #ifdef GAS_MNEMONICS
8080 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8082 PRINT_OPERAND (file, x, 0);
8088 /* Little bit of braindamage here. The SSE compare instructions
8089 does use completely different names for the comparisons that the
8090 fp conditional moves. */
8091 switch (GET_CODE (x))
8106 fputs ("unord", file);
8110 fputs ("neq", file);
8114 fputs ("nlt", file);
8118 fputs ("nle", file);
8121 fputs ("ord", file);
8128 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8129 if (ASSEMBLER_DIALECT == ASM_ATT)
8131 switch (GET_MODE (x))
8133 case HImode: putc ('w', file); break;
8135 case SFmode: putc ('l', file); break;
8137 case DFmode: putc ('q', file); break;
8138 default: gcc_unreachable ();
8145 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8148 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8149 if (ASSEMBLER_DIALECT == ASM_ATT)
8152 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8155 /* Like above, but reverse condition */
8157 /* Check to see if argument to %c is really a constant
8158 and not a condition code which needs to be reversed. */
8159 if (!COMPARISON_P (x))
8161 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8164 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8167 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8168 if (ASSEMBLER_DIALECT == ASM_ATT)
8171 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8175 /* It doesn't actually matter what mode we use here, as we're
8176 only going to use this for printing. */
8177 x = adjust_address_nv (x, DImode, 8);
8184 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8187 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8190 int pred_val = INTVAL (XEXP (x, 0));
8192 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8193 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8195 int taken = pred_val > REG_BR_PROB_BASE / 2;
8196 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8198 /* Emit hints only in the case default branch prediction
8199 heuristics would fail. */
8200 if (taken != cputaken)
8202 /* We use 3e (DS) prefix for taken branches and
8203 2e (CS) prefix for not taken branches. */
8205 fputs ("ds ; ", file);
8207 fputs ("cs ; ", file);
8214 output_operand_lossage ("invalid operand code '%c'", code);
8218 if (GET_CODE (x) == REG)
8219 print_reg (x, code, file);
8221 else if (GET_CODE (x) == MEM)
8223 /* No `byte ptr' prefix for call instructions. */
8224 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8227 switch (GET_MODE_SIZE (GET_MODE (x)))
8229 case 1: size = "BYTE"; break;
8230 case 2: size = "WORD"; break;
8231 case 4: size = "DWORD"; break;
8232 case 8: size = "QWORD"; break;
8233 case 12: size = "XWORD"; break;
8234 case 16: size = "XMMWORD"; break;
8239 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8242 else if (code == 'w')
8244 else if (code == 'k')
8248 fputs (" PTR ", file);
8252 /* Avoid (%rip) for call operands. */
8253 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8254 && GET_CODE (x) != CONST_INT)
8255 output_addr_const (file, x);
8256 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8257 output_operand_lossage ("invalid constraints for operand");
8262 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8267 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8268 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8270 if (ASSEMBLER_DIALECT == ASM_ATT)
8272 fprintf (file, "0x%08lx", l);
8275 /* These float cases don't actually occur as immediate operands. */
8276 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8280 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8281 fprintf (file, "%s", dstr);
8284 else if (GET_CODE (x) == CONST_DOUBLE
8285 && GET_MODE (x) == XFmode)
8289 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8290 fprintf (file, "%s", dstr);
8295 /* We have patterns that allow zero sets of memory, for instance.
8296 In 64-bit mode, we should probably support all 8-byte vectors,
8297 since we can in fact encode that into an immediate. */
8298 if (GET_CODE (x) == CONST_VECTOR)
8300 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8306 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8308 if (ASSEMBLER_DIALECT == ASM_ATT)
8311 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8312 || GET_CODE (x) == LABEL_REF)
8314 if (ASSEMBLER_DIALECT == ASM_ATT)
8317 fputs ("OFFSET FLAT:", file);
8320 if (GET_CODE (x) == CONST_INT)
8321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8323 output_pic_addr_const (file, x, code);
8325 output_addr_const (file, x);
8329 /* Print a memory operand whose address is ADDR. */
8332 print_operand_address (FILE *file, rtx addr)
8334 struct ix86_address parts;
8335 rtx base, index, disp;
8337 int ok = ix86_decompose_address (addr, &parts);
8342 index = parts.index;
8344 scale = parts.scale;
8352 if (USER_LABEL_PREFIX[0] == 0)
8354 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8360 if (!base && !index)
8362 /* Displacement only requires special attention. */
8364 if (GET_CODE (disp) == CONST_INT)
8366 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8368 if (USER_LABEL_PREFIX[0] == 0)
8370 fputs ("ds:", file);
8372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8375 output_pic_addr_const (file, disp, 0);
8377 output_addr_const (file, disp);
8379 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8382 if (GET_CODE (disp) == CONST
8383 && GET_CODE (XEXP (disp, 0)) == PLUS
8384 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8385 disp = XEXP (XEXP (disp, 0), 0);
8386 if (GET_CODE (disp) == LABEL_REF
8387 || (GET_CODE (disp) == SYMBOL_REF
8388 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8389 fputs ("(%rip)", file);
8394 if (ASSEMBLER_DIALECT == ASM_ATT)
8399 output_pic_addr_const (file, disp, 0);
8400 else if (GET_CODE (disp) == LABEL_REF)
8401 output_asm_label (disp);
8403 output_addr_const (file, disp);
8408 print_reg (base, 0, file);
8412 print_reg (index, 0, file);
8414 fprintf (file, ",%d", scale);
8420 rtx offset = NULL_RTX;
8424 /* Pull out the offset of a symbol; print any symbol itself. */
8425 if (GET_CODE (disp) == CONST
8426 && GET_CODE (XEXP (disp, 0)) == PLUS
8427 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8429 offset = XEXP (XEXP (disp, 0), 1);
8430 disp = gen_rtx_CONST (VOIDmode,
8431 XEXP (XEXP (disp, 0), 0));
8435 output_pic_addr_const (file, disp, 0);
8436 else if (GET_CODE (disp) == LABEL_REF)
8437 output_asm_label (disp);
8438 else if (GET_CODE (disp) == CONST_INT)
8441 output_addr_const (file, disp);
8447 print_reg (base, 0, file);
8450 if (INTVAL (offset) >= 0)
8452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8463 print_reg (index, 0, file);
8465 fprintf (file, "*%d", scale);
8473 output_addr_const_extra (FILE *file, rtx x)
8477 if (GET_CODE (x) != UNSPEC)
8480 op = XVECEXP (x, 0, 0);
8481 switch (XINT (x, 1))
8483 case UNSPEC_GOTTPOFF:
8484 output_addr_const (file, op);
8485 /* FIXME: This might be @TPOFF in Sun ld. */
8486 fputs ("@GOTTPOFF", file);
8489 output_addr_const (file, op);
8490 fputs ("@TPOFF", file);
8493 output_addr_const (file, op);
8495 fputs ("@TPOFF", file);
8497 fputs ("@NTPOFF", file);
8500 output_addr_const (file, op);
8501 fputs ("@DTPOFF", file);
8503 case UNSPEC_GOTNTPOFF:
8504 output_addr_const (file, op);
8506 fputs ("@GOTTPOFF(%rip)", file);
8508 fputs ("@GOTNTPOFF", file);
8510 case UNSPEC_INDNTPOFF:
8511 output_addr_const (file, op);
8512 fputs ("@INDNTPOFF", file);
8522 /* Split one or more DImode RTL references into pairs of SImode
8523 references. The RTL can be REG, offsettable MEM, integer constant, or
8524 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8525 split and "num" is its length. lo_half and hi_half are output arrays
8526 that parallel "operands". */
8529 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8533 rtx op = operands[num];
8535 /* simplify_subreg refuse to split volatile memory addresses,
8536 but we still have to handle it. */
8537 if (GET_CODE (op) == MEM)
8539 lo_half[num] = adjust_address (op, SImode, 0);
8540 hi_half[num] = adjust_address (op, SImode, 4);
8544 lo_half[num] = simplify_gen_subreg (SImode, op,
8545 GET_MODE (op) == VOIDmode
8546 ? DImode : GET_MODE (op), 0);
8547 hi_half[num] = simplify_gen_subreg (SImode, op,
8548 GET_MODE (op) == VOIDmode
8549 ? DImode : GET_MODE (op), 4);
8553 /* Split one or more TImode RTL references into pairs of DImode
8554 references. The RTL can be REG, offsettable MEM, integer constant, or
8555 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8556 split and "num" is its length. lo_half and hi_half are output arrays
8557 that parallel "operands". */
8560 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8564 rtx op = operands[num];
8566 /* simplify_subreg refuse to split volatile memory addresses, but we
8567 still have to handle it. */
8568 if (GET_CODE (op) == MEM)
8570 lo_half[num] = adjust_address (op, DImode, 0);
8571 hi_half[num] = adjust_address (op, DImode, 8);
8575 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8576 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8581 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8582 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8583 is the expression of the binary operation. The output may either be
8584 emitted here, or returned to the caller, like all output_* functions.
8586 There is no guarantee that the operands are the same mode, as they
8587 might be within FLOAT or FLOAT_EXTEND expressions. */
8589 #ifndef SYSV386_COMPAT
8590 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8591 wants to fix the assemblers because that causes incompatibility
8592 with gcc. No-one wants to fix gcc because that causes
8593 incompatibility with assemblers... You can use the option of
8594 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8595 #define SYSV386_COMPAT 1
8599 output_387_binary_op (rtx insn, rtx *operands)
8601 static char buf[30];
8604 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8606 #ifdef ENABLE_CHECKING
8607 /* Even if we do not want to check the inputs, this documents input
8608 constraints. Which helps in understanding the following code. */
8609 if (STACK_REG_P (operands[0])
8610 && ((REG_P (operands[1])
8611 && REGNO (operands[0]) == REGNO (operands[1])
8612 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8613 || (REG_P (operands[2])
8614 && REGNO (operands[0]) == REGNO (operands[2])
8615 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8616 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8619 gcc_assert (is_sse);
8622 switch (GET_CODE (operands[3]))
8625 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8626 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8634 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8635 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8643 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8644 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8652 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8653 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8667 if (GET_MODE (operands[0]) == SFmode)
8668 strcat (buf, "ss\t{%2, %0|%0, %2}");
8670 strcat (buf, "sd\t{%2, %0|%0, %2}");
8675 switch (GET_CODE (operands[3]))
8679 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8681 rtx temp = operands[2];
8682 operands[2] = operands[1];
8686 /* know operands[0] == operands[1]. */
8688 if (GET_CODE (operands[2]) == MEM)
8694 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8696 if (STACK_TOP_P (operands[0]))
8697 /* How is it that we are storing to a dead operand[2]?
8698 Well, presumably operands[1] is dead too. We can't
8699 store the result to st(0) as st(0) gets popped on this
8700 instruction. Instead store to operands[2] (which I
8701 think has to be st(1)). st(1) will be popped later.
8702 gcc <= 2.8.1 didn't have this check and generated
8703 assembly code that the Unixware assembler rejected. */
8704 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8706 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8710 if (STACK_TOP_P (operands[0]))
8711 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8713 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8718 if (GET_CODE (operands[1]) == MEM)
8724 if (GET_CODE (operands[2]) == MEM)
8730 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8733 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8734 derived assemblers, confusingly reverse the direction of
8735 the operation for fsub{r} and fdiv{r} when the
8736 destination register is not st(0). The Intel assembler
8737 doesn't have this brain damage. Read !SYSV386_COMPAT to
8738 figure out what the hardware really does. */
8739 if (STACK_TOP_P (operands[0]))
8740 p = "{p\t%0, %2|rp\t%2, %0}";
8742 p = "{rp\t%2, %0|p\t%0, %2}";
8744 if (STACK_TOP_P (operands[0]))
8745 /* As above for fmul/fadd, we can't store to st(0). */
8746 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8748 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8753 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8756 if (STACK_TOP_P (operands[0]))
8757 p = "{rp\t%0, %1|p\t%1, %0}";
8759 p = "{p\t%1, %0|rp\t%0, %1}";
8761 if (STACK_TOP_P (operands[0]))
8762 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8764 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8769 if (STACK_TOP_P (operands[0]))
8771 if (STACK_TOP_P (operands[1]))
8772 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8774 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8777 else if (STACK_TOP_P (operands[1]))
8780 p = "{\t%1, %0|r\t%0, %1}";
8782 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8788 p = "{r\t%2, %0|\t%0, %2}";
8790 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8803 /* Return needed mode for entity in optimize_mode_switching pass. */
8806 ix86_mode_needed (int entity, rtx insn)
8808 enum attr_i387_cw mode;
8810 /* The mode UNINITIALIZED is used to store control word after a
8811 function call or ASM pattern. The mode ANY specify that function
8812 has no requirements on the control word and make no changes in the
8813 bits we are interested in. */
8816 || (NONJUMP_INSN_P (insn)
8817 && (asm_noperands (PATTERN (insn)) >= 0
8818 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8819 return I387_CW_UNINITIALIZED;
8821 if (recog_memoized (insn) < 0)
8824 mode = get_attr_i387_cw (insn);
8829 if (mode == I387_CW_TRUNC)
8834 if (mode == I387_CW_FLOOR)
8839 if (mode == I387_CW_CEIL)
8844 if (mode == I387_CW_MASK_PM)
8855 /* Output code to initialize control word copies used by trunc?f?i and
8856 rounding patterns. CURRENT_MODE is set to current control word,
8857 while NEW_MODE is set to new control word. */
8860 emit_i387_cw_initialization (int mode)
8862 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8867 rtx reg = gen_reg_rtx (HImode);
8869 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8870 emit_move_insn (reg, copy_rtx (stored_mode));
8872 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8877 /* round toward zero (truncate) */
8878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8879 slot = SLOT_CW_TRUNC;
8883 /* round down toward -oo */
8884 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8885 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8886 slot = SLOT_CW_FLOOR;
8890 /* round up toward +oo */
8891 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8892 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8893 slot = SLOT_CW_CEIL;
8896 case I387_CW_MASK_PM:
8897 /* mask precision exception for nearbyint() */
8898 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8899 slot = SLOT_CW_MASK_PM;
8911 /* round toward zero (truncate) */
8912 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8913 slot = SLOT_CW_TRUNC;
8917 /* round down toward -oo */
8918 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8919 slot = SLOT_CW_FLOOR;
8923 /* round up toward +oo */
8924 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8925 slot = SLOT_CW_CEIL;
8928 case I387_CW_MASK_PM:
8929 /* mask precision exception for nearbyint() */
8930 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8931 slot = SLOT_CW_MASK_PM;
8939 gcc_assert (slot < MAX_386_STACK_LOCALS);
8941 new_mode = assign_386_stack_local (HImode, slot);
8942 emit_move_insn (new_mode, reg);
8945 /* Output code for INSN to convert a float to a signed int. OPERANDS
8946 are the insn operands. The output may be [HSD]Imode and the input
8947 operand may be [SDX]Fmode. */
8950 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8952 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8953 int dimode_p = GET_MODE (operands[0]) == DImode;
8954 int round_mode = get_attr_i387_cw (insn);
8956 /* Jump through a hoop or two for DImode, since the hardware has no
8957 non-popping instruction. We used to do this a different way, but
8958 that was somewhat fragile and broke with post-reload splitters. */
8959 if ((dimode_p || fisttp) && !stack_top_dies)
8960 output_asm_insn ("fld\t%y1", operands);
8962 gcc_assert (STACK_TOP_P (operands[1]));
8963 gcc_assert (GET_CODE (operands[0]) == MEM);
8966 output_asm_insn ("fisttp%z0\t%0", operands);
8969 if (round_mode != I387_CW_ANY)
8970 output_asm_insn ("fldcw\t%3", operands);
8971 if (stack_top_dies || dimode_p)
8972 output_asm_insn ("fistp%z0\t%0", operands);
8974 output_asm_insn ("fist%z0\t%0", operands);
8975 if (round_mode != I387_CW_ANY)
8976 output_asm_insn ("fldcw\t%2", operands);
8982 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8983 have the values zero or one, indicates the ffreep insn's operand
8984 from the OPERANDS array. */
8987 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8989 if (TARGET_USE_FFREEP)
8990 #if HAVE_AS_IX86_FFREEP
8991 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8994 static char retval[] = ".word\t0xc_df";
8995 int regno = REGNO (operands[opno]);
8997 gcc_assert (FP_REGNO_P (regno));
8999 retval[9] = '0' + (regno - FIRST_STACK_REG);
9004 return opno ? "fstp\t%y1" : "fstp\t%y0";
9008 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9009 should be used. UNORDERED_P is true when fucom should be used. */
9012 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9015 rtx cmp_op0, cmp_op1;
9016 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9020 cmp_op0 = operands[0];
9021 cmp_op1 = operands[1];
9025 cmp_op0 = operands[1];
9026 cmp_op1 = operands[2];
9031 if (GET_MODE (operands[0]) == SFmode)
9033 return "ucomiss\t{%1, %0|%0, %1}";
9035 return "comiss\t{%1, %0|%0, %1}";
9038 return "ucomisd\t{%1, %0|%0, %1}";
9040 return "comisd\t{%1, %0|%0, %1}";
9043 gcc_assert (STACK_TOP_P (cmp_op0));
9045 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9047 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9051 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9052 return output_387_ffreep (operands, 1);
9055 return "ftst\n\tfnstsw\t%0";
9058 if (STACK_REG_P (cmp_op1)
9060 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9061 && REGNO (cmp_op1) != FIRST_STACK_REG)
9063 /* If both the top of the 387 stack dies, and the other operand
9064 is also a stack register that dies, then this must be a
9065 `fcompp' float compare */
9069 /* There is no double popping fcomi variant. Fortunately,
9070 eflags is immune from the fstp's cc clobbering. */
9072 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9074 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9075 return output_387_ffreep (operands, 0);
9080 return "fucompp\n\tfnstsw\t%0";
9082 return "fcompp\n\tfnstsw\t%0";
9087 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9089 static const char * const alt[16] =
9091 "fcom%z2\t%y2\n\tfnstsw\t%0",
9092 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9093 "fucom%z2\t%y2\n\tfnstsw\t%0",
9094 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9096 "ficom%z2\t%y2\n\tfnstsw\t%0",
9097 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9101 "fcomi\t{%y1, %0|%0, %y1}",
9102 "fcomip\t{%y1, %0|%0, %y1}",
9103 "fucomi\t{%y1, %0|%0, %y1}",
9104 "fucomip\t{%y1, %0|%0, %y1}",
9115 mask = eflags_p << 3;
9116 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9117 mask |= unordered_p << 1;
9118 mask |= stack_top_dies;
9120 gcc_assert (mask < 16);
9129 ix86_output_addr_vec_elt (FILE *file, int value)
9131 const char *directive = ASM_LONG;
9135 directive = ASM_QUAD;
9137 gcc_assert (!TARGET_64BIT);
9140 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9144 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9147 fprintf (file, "%s%s%d-%s%d\n",
9148 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9149 else if (HAVE_AS_GOTOFF_IN_DATA)
9150 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9152 else if (TARGET_MACHO)
9154 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9155 machopic_output_function_base_name (file);
9156 fprintf(file, "\n");
9160 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9161 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9164 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9168 ix86_expand_clear (rtx dest)
9172 /* We play register width games, which are only valid after reload. */
9173 gcc_assert (reload_completed);
9175 /* Avoid HImode and its attendant prefix byte. */
9176 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9177 dest = gen_rtx_REG (SImode, REGNO (dest));
9179 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9181 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9182 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9184 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9185 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9191 /* X is an unchanging MEM. If it is a constant pool reference, return
9192 the constant pool rtx, else NULL. */
9195 maybe_get_pool_constant (rtx x)
9197 x = ix86_delegitimize_address (XEXP (x, 0));
9199 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9200 return get_pool_constant (x);
9206 ix86_expand_move (enum machine_mode mode, rtx operands[])
9208 int strict = (reload_in_progress || reload_completed);
9210 enum tls_model model;
9215 if (GET_CODE (op1) == SYMBOL_REF)
9217 model = SYMBOL_REF_TLS_MODEL (op1);
9220 op1 = legitimize_tls_address (op1, model, true);
9221 op1 = force_operand (op1, op0);
9226 else if (GET_CODE (op1) == CONST
9227 && GET_CODE (XEXP (op1, 0)) == PLUS
9228 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9230 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9233 rtx addend = XEXP (XEXP (op1, 0), 1);
9234 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9235 op1 = force_operand (op1, NULL);
9236 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9237 op0, 1, OPTAB_DIRECT);
9243 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9245 if (TARGET_MACHO && !TARGET_64BIT)
9250 rtx temp = ((reload_in_progress
9251 || ((op0 && GET_CODE (op0) == REG)
9253 ? op0 : gen_reg_rtx (Pmode));
9254 op1 = machopic_indirect_data_reference (op1, temp);
9255 op1 = machopic_legitimize_pic_address (op1, mode,
9256 temp == op1 ? 0 : temp);
9258 else if (MACHOPIC_INDIRECT)
9259 op1 = machopic_indirect_data_reference (op1, 0);
9266 if (GET_CODE (op0) == MEM)
9267 op1 = force_reg (Pmode, op1);
9269 op1 = legitimize_address (op1, op1, Pmode);
9274 if (GET_CODE (op0) == MEM
9275 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9276 || !push_operand (op0, mode))
9277 && GET_CODE (op1) == MEM)
9278 op1 = force_reg (mode, op1);
9280 if (push_operand (op0, mode)
9281 && ! general_no_elim_operand (op1, mode))
9282 op1 = copy_to_mode_reg (mode, op1);
9284 /* Force large constants in 64bit compilation into register
9285 to get them CSEed. */
9286 if (TARGET_64BIT && mode == DImode
9287 && immediate_operand (op1, mode)
9288 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9289 && !register_operand (op0, mode)
9290 && optimize && !reload_completed && !reload_in_progress)
9291 op1 = copy_to_mode_reg (mode, op1);
9293 if (FLOAT_MODE_P (mode))
9295 /* If we are loading a floating point constant to a register,
9296 force the value to memory now, since we'll get better code
9297 out the back end. */
9301 else if (GET_CODE (op1) == CONST_DOUBLE)
9303 op1 = validize_mem (force_const_mem (mode, op1));
9304 if (!register_operand (op0, mode))
9306 rtx temp = gen_reg_rtx (mode);
9307 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9308 emit_move_insn (op0, temp);
9315 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9319 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9321 rtx op0 = operands[0], op1 = operands[1];
9323 /* Force constants other than zero into memory. We do not know how
9324 the instructions used to build constants modify the upper 64 bits
9325 of the register, once we have that information we may be able
9326 to handle some of them more efficiently. */
9327 if ((reload_in_progress | reload_completed) == 0
9328 && register_operand (op0, mode)
9330 && standard_sse_constant_p (op1) <= 0)
9331 op1 = validize_mem (force_const_mem (mode, op1));
9333 /* Make operand1 a register if it isn't already. */
9335 && !register_operand (op0, mode)
9336 && !register_operand (op1, mode))
9338 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9342 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9345 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9346 straight to ix86_expand_vector_move. */
9349 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9358 /* If we're optimizing for size, movups is the smallest. */
9361 op0 = gen_lowpart (V4SFmode, op0);
9362 op1 = gen_lowpart (V4SFmode, op1);
9363 emit_insn (gen_sse_movups (op0, op1));
9367 /* ??? If we have typed data, then it would appear that using
9368 movdqu is the only way to get unaligned data loaded with
9370 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9372 op0 = gen_lowpart (V16QImode, op0);
9373 op1 = gen_lowpart (V16QImode, op1);
9374 emit_insn (gen_sse2_movdqu (op0, op1));
9378 if (TARGET_SSE2 && mode == V2DFmode)
9382 /* When SSE registers are split into halves, we can avoid
9383 writing to the top half twice. */
9384 if (TARGET_SSE_SPLIT_REGS)
9386 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9391 /* ??? Not sure about the best option for the Intel chips.
9392 The following would seem to satisfy; the register is
9393 entirely cleared, breaking the dependency chain. We
9394 then store to the upper half, with a dependency depth
9395 of one. A rumor has it that Intel recommends two movsd
9396 followed by an unpacklpd, but this is unconfirmed. And
9397 given that the dependency depth of the unpacklpd would
9398 still be one, I'm not sure why this would be better. */
9399 zero = CONST0_RTX (V2DFmode);
9402 m = adjust_address (op1, DFmode, 0);
9403 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9404 m = adjust_address (op1, DFmode, 8);
9405 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9409 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9410 emit_move_insn (op0, CONST0_RTX (mode));
9412 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9414 if (mode != V4SFmode)
9415 op0 = gen_lowpart (V4SFmode, op0);
9416 m = adjust_address (op1, V2SFmode, 0);
9417 emit_insn (gen_sse_loadlps (op0, op0, m));
9418 m = adjust_address (op1, V2SFmode, 8);
9419 emit_insn (gen_sse_loadhps (op0, op0, m));
9422 else if (MEM_P (op0))
9424 /* If we're optimizing for size, movups is the smallest. */
9427 op0 = gen_lowpart (V4SFmode, op0);
9428 op1 = gen_lowpart (V4SFmode, op1);
9429 emit_insn (gen_sse_movups (op0, op1));
9433 /* ??? Similar to above, only less clear because of quote
9434 typeless stores unquote. */
9435 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9436 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9438 op0 = gen_lowpart (V16QImode, op0);
9439 op1 = gen_lowpart (V16QImode, op1);
9440 emit_insn (gen_sse2_movdqu (op0, op1));
9444 if (TARGET_SSE2 && mode == V2DFmode)
9446 m = adjust_address (op0, DFmode, 0);
9447 emit_insn (gen_sse2_storelpd (m, op1));
9448 m = adjust_address (op0, DFmode, 8);
9449 emit_insn (gen_sse2_storehpd (m, op1));
9453 if (mode != V4SFmode)
9454 op1 = gen_lowpart (V4SFmode, op1);
9455 m = adjust_address (op0, V2SFmode, 0);
9456 emit_insn (gen_sse_storelps (m, op1));
9457 m = adjust_address (op0, V2SFmode, 8);
9458 emit_insn (gen_sse_storehps (m, op1));
9465 /* Expand a push in MODE. This is some mode for which we do not support
9466 proper push instructions, at least from the registers that we expect
9467 the value to live in. */
9470 ix86_expand_push (enum machine_mode mode, rtx x)
9474 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9475 GEN_INT (-GET_MODE_SIZE (mode)),
9476 stack_pointer_rtx, 1, OPTAB_DIRECT);
9477 if (tmp != stack_pointer_rtx)
9478 emit_move_insn (stack_pointer_rtx, tmp);
9480 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9481 emit_move_insn (tmp, x);
9484 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9485 destination to use for the operation. If different from the true
9486 destination in operands[0], a copy operation will be required. */
9489 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9492 int matching_memory;
9493 rtx src1, src2, dst;
9499 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9500 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9501 && (rtx_equal_p (dst, src2)
9502 || immediate_operand (src1, mode)))
9509 /* If the destination is memory, and we do not have matching source
9510 operands, do things in registers. */
9511 matching_memory = 0;
9512 if (GET_CODE (dst) == MEM)
9514 if (rtx_equal_p (dst, src1))
9515 matching_memory = 1;
9516 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9517 && rtx_equal_p (dst, src2))
9518 matching_memory = 2;
9520 dst = gen_reg_rtx (mode);
9523 /* Both source operands cannot be in memory. */
9524 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9526 if (matching_memory != 2)
9527 src2 = force_reg (mode, src2);
9529 src1 = force_reg (mode, src1);
9532 /* If the operation is not commutable, source 1 cannot be a constant
9533 or non-matching memory. */
9534 if ((CONSTANT_P (src1)
9535 || (!matching_memory && GET_CODE (src1) == MEM))
9536 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9537 src1 = force_reg (mode, src1);
9539 src1 = operands[1] = src1;
9540 src2 = operands[2] = src2;
9544 /* Similarly, but assume that the destination has already been
9548 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9549 enum machine_mode mode, rtx operands[])
9551 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9552 gcc_assert (dst == operands[0]);
9555 /* Attempt to expand a binary operator. Make the expansion closer to the
9556 actual machine, then just general_operand, which will allow 3 separate
9557 memory references (one output, two input) in a single insn. */
9560 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9563 rtx src1, src2, dst, op, clob;
9565 dst = ix86_fixup_binary_operands (code, mode, operands);
9569 /* Emit the instruction. */
9571 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9572 if (reload_in_progress)
9574 /* Reload doesn't know about the flags register, and doesn't know that
9575 it doesn't want to clobber it. We can only do this with PLUS. */
9576 gcc_assert (code == PLUS);
9581 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9582 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9585 /* Fix up the destination if needed. */
9586 if (dst != operands[0])
9587 emit_move_insn (operands[0], dst);
9590 /* Return TRUE or FALSE depending on whether the binary operator meets the
9591 appropriate constraints. */
9594 ix86_binary_operator_ok (enum rtx_code code,
9595 enum machine_mode mode ATTRIBUTE_UNUSED,
9598 /* Both source operands cannot be in memory. */
9599 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9601 /* If the operation is not commutable, source 1 cannot be a constant. */
9602 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9604 /* If the destination is memory, we must have a matching source operand. */
9605 if (GET_CODE (operands[0]) == MEM
9606 && ! (rtx_equal_p (operands[0], operands[1])
9607 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9608 && rtx_equal_p (operands[0], operands[2]))))
9610 /* If the operation is not commutable and the source 1 is memory, we must
9611 have a matching destination. */
9612 if (GET_CODE (operands[1]) == MEM
9613 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9614 && ! rtx_equal_p (operands[0], operands[1]))
9619 /* Attempt to expand a unary operator. Make the expansion closer to the
9620 actual machine, then just general_operand, which will allow 2 separate
9621 memory references (one output, one input) in a single insn. */
9624 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9627 int matching_memory;
9628 rtx src, dst, op, clob;
9633 /* If the destination is memory, and we do not have matching source
9634 operands, do things in registers. */
9635 matching_memory = 0;
9638 if (rtx_equal_p (dst, src))
9639 matching_memory = 1;
9641 dst = gen_reg_rtx (mode);
9644 /* When source operand is memory, destination must match. */
9645 if (MEM_P (src) && !matching_memory)
9646 src = force_reg (mode, src);
9648 /* Emit the instruction. */
9650 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9651 if (reload_in_progress || code == NOT)
9653 /* Reload doesn't know about the flags register, and doesn't know that
9654 it doesn't want to clobber it. */
9655 gcc_assert (code == NOT);
9660 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9661 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9664 /* Fix up the destination if needed. */
9665 if (dst != operands[0])
9666 emit_move_insn (operands[0], dst);
9669 /* Return TRUE or FALSE depending on whether the unary operator meets the
9670 appropriate constraints. */
9673 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9674 enum machine_mode mode ATTRIBUTE_UNUSED,
9675 rtx operands[2] ATTRIBUTE_UNUSED)
9677 /* If one of operands is memory, source and destination must match. */
9678 if ((GET_CODE (operands[0]) == MEM
9679 || GET_CODE (operands[1]) == MEM)
9680 && ! rtx_equal_p (operands[0], operands[1]))
9685 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9686 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9687 true, then replicate the mask for all elements of the vector register.
9688 If INVERT is true, then create a mask excluding the sign bit. */
9691 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9693 enum machine_mode vec_mode;
9694 HOST_WIDE_INT hi, lo;
9699 /* Find the sign bit, sign extended to 2*HWI. */
9701 lo = 0x80000000, hi = lo < 0;
9702 else if (HOST_BITS_PER_WIDE_INT >= 64)
9703 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9705 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9710 /* Force this value into the low part of a fp vector constant. */
9711 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9712 mask = gen_lowpart (mode, mask);
9717 v = gen_rtvec (4, mask, mask, mask, mask);
9719 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9720 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9721 vec_mode = V4SFmode;
9726 v = gen_rtvec (2, mask, mask);
9728 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9729 vec_mode = V2DFmode;
9732 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9735 /* Generate code for floating point ABS or NEG. */
9738 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9741 rtx mask, set, use, clob, dst, src;
9742 bool matching_memory;
9743 bool use_sse = false;
9744 bool vector_mode = VECTOR_MODE_P (mode);
9745 enum machine_mode elt_mode = mode;
9749 elt_mode = GET_MODE_INNER (mode);
9752 else if (TARGET_SSE_MATH)
9753 use_sse = SSE_FLOAT_MODE_P (mode);
9755 /* NEG and ABS performed with SSE use bitwise mask operations.
9756 Create the appropriate mask now. */
9758 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9765 /* If the destination is memory, and we don't have matching source
9766 operands or we're using the x87, do things in registers. */
9767 matching_memory = false;
9770 if (use_sse && rtx_equal_p (dst, src))
9771 matching_memory = true;
9773 dst = gen_reg_rtx (mode);
9775 if (MEM_P (src) && !matching_memory)
9776 src = force_reg (mode, src);
9780 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9781 set = gen_rtx_SET (VOIDmode, dst, set);
9786 set = gen_rtx_fmt_e (code, mode, src);
9787 set = gen_rtx_SET (VOIDmode, dst, set);
9790 use = gen_rtx_USE (VOIDmode, mask);
9791 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9792 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9793 gen_rtvec (3, set, use, clob)));
9799 if (dst != operands[0])
9800 emit_move_insn (operands[0], dst);
9803 /* Expand a copysign operation. Special case operand 0 being a constant. */
9806 ix86_expand_copysign (rtx operands[])
9808 enum machine_mode mode, vmode;
9809 rtx dest, op0, op1, mask, nmask;
9815 mode = GET_MODE (dest);
9816 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9818 if (GET_CODE (op0) == CONST_DOUBLE)
9822 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9823 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9825 if (op0 == CONST0_RTX (mode))
9826 op0 = CONST0_RTX (vmode);
9830 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9831 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9833 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9834 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9837 mask = ix86_build_signbit_mask (mode, 0, 0);
9840 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9842 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9846 nmask = ix86_build_signbit_mask (mode, 0, 1);
9847 mask = ix86_build_signbit_mask (mode, 0, 0);
9850 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9852 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9856 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9857 be a constant, and so has already been expanded into a vector constant. */
9860 ix86_split_copysign_const (rtx operands[])
9862 enum machine_mode mode, vmode;
9863 rtx dest, op0, op1, mask, x;
9870 mode = GET_MODE (dest);
9871 vmode = GET_MODE (mask);
9873 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9874 x = gen_rtx_AND (vmode, dest, mask);
9875 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9877 if (op0 != CONST0_RTX (vmode))
9879 x = gen_rtx_IOR (vmode, dest, op0);
9880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9884 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9885 so we have to do two masks. */
9888 ix86_split_copysign_var (rtx operands[])
9890 enum machine_mode mode, vmode;
9891 rtx dest, scratch, op0, op1, mask, nmask, x;
9894 scratch = operands[1];
9897 nmask = operands[4];
9900 mode = GET_MODE (dest);
9901 vmode = GET_MODE (mask);
9903 if (rtx_equal_p (op0, op1))
9905 /* Shouldn't happen often (it's useless, obviously), but when it does
9906 we'd generate incorrect code if we continue below. */
9907 emit_move_insn (dest, op0);
9911 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9913 gcc_assert (REGNO (op1) == REGNO (scratch));
9915 x = gen_rtx_AND (vmode, scratch, mask);
9916 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9919 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9920 x = gen_rtx_NOT (vmode, dest);
9921 x = gen_rtx_AND (vmode, x, op0);
9922 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9926 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9928 x = gen_rtx_AND (vmode, scratch, mask);
9930 else /* alternative 2,4 */
9932 gcc_assert (REGNO (mask) == REGNO (scratch));
9933 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9934 x = gen_rtx_AND (vmode, scratch, op1);
9936 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9938 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9940 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9941 x = gen_rtx_AND (vmode, dest, nmask);
9943 else /* alternative 3,4 */
9945 gcc_assert (REGNO (nmask) == REGNO (dest));
9947 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9948 x = gen_rtx_AND (vmode, dest, op0);
9950 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9953 x = gen_rtx_IOR (vmode, dest, scratch);
9954 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9957 /* Return TRUE or FALSE depending on whether the first SET in INSN
9958 has source and destination with matching CC modes, and that the
9959 CC mode is at least as constrained as REQ_MODE. */
9962 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9965 enum machine_mode set_mode;
9967 set = PATTERN (insn);
9968 if (GET_CODE (set) == PARALLEL)
9969 set = XVECEXP (set, 0, 0);
9970 gcc_assert (GET_CODE (set) == SET);
9971 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9973 set_mode = GET_MODE (SET_DEST (set));
9977 if (req_mode != CCNOmode
9978 && (req_mode != CCmode
9979 || XEXP (SET_SRC (set), 1) != const0_rtx))
9983 if (req_mode == CCGCmode)
9987 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9991 if (req_mode == CCZmode)
10001 return (GET_MODE (SET_SRC (set)) == set_mode);
10004 /* Generate insn patterns to do an integer compare of OPERANDS. */
10007 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10009 enum machine_mode cmpmode;
10012 cmpmode = SELECT_CC_MODE (code, op0, op1);
10013 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10015 /* This is very simple, but making the interface the same as in the
10016 FP case makes the rest of the code easier. */
10017 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10018 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10020 /* Return the test that should be put into the flags user, i.e.
10021 the bcc, scc, or cmov instruction. */
10022 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10025 /* Figure out whether to use ordered or unordered fp comparisons.
10026 Return the appropriate mode to use. */
10029 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10031 /* ??? In order to make all comparisons reversible, we do all comparisons
10032 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10033 all forms trapping and nontrapping comparisons, we can make inequality
10034 comparisons trapping again, since it results in better code when using
10035 FCOM based compares. */
10036 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10040 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10042 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10043 return ix86_fp_compare_mode (code);
10046 /* Only zero flag is needed. */
10047 case EQ: /* ZF=0 */
10048 case NE: /* ZF!=0 */
10050 /* Codes needing carry flag. */
10051 case GEU: /* CF=0 */
10052 case GTU: /* CF=0 & ZF=0 */
10053 case LTU: /* CF=1 */
10054 case LEU: /* CF=1 | ZF=1 */
10056 /* Codes possibly doable only with sign flag when
10057 comparing against zero. */
10058 case GE: /* SF=OF or SF=0 */
10059 case LT: /* SF<>OF or SF=1 */
10060 if (op1 == const0_rtx)
10063 /* For other cases Carry flag is not required. */
10065 /* Codes doable only with sign flag when comparing
10066 against zero, but we miss jump instruction for it
10067 so we need to use relational tests against overflow
10068 that thus needs to be zero. */
10069 case GT: /* ZF=0 & SF=OF */
10070 case LE: /* ZF=1 | SF<>OF */
10071 if (op1 == const0_rtx)
10075 /* strcmp pattern do (use flags) and combine may ask us for proper
10080 gcc_unreachable ();
10084 /* Return the fixed registers used for condition codes. */
10087 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10094 /* If two condition code modes are compatible, return a condition code
10095 mode which is compatible with both. Otherwise, return
10098 static enum machine_mode
10099 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10104 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10107 if ((m1 == CCGCmode && m2 == CCGOCmode)
10108 || (m1 == CCGOCmode && m2 == CCGCmode))
10114 gcc_unreachable ();
10136 /* These are only compatible with themselves, which we already
10142 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10145 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10147 enum rtx_code swapped_code = swap_condition (code);
10148 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10149 || (ix86_fp_comparison_cost (swapped_code)
10150 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10153 /* Swap, force into registers, or otherwise massage the two operands
10154 to a fp comparison. The operands are updated in place; the new
10155 comparison code is returned. */
10157 static enum rtx_code
10158 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10160 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10161 rtx op0 = *pop0, op1 = *pop1;
10162 enum machine_mode op_mode = GET_MODE (op0);
10163 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10165 /* All of the unordered compare instructions only work on registers.
10166 The same is true of the fcomi compare instructions. The XFmode
10167 compare instructions require registers except when comparing
10168 against zero or when converting operand 1 from fixed point to
10172 && (fpcmp_mode == CCFPUmode
10173 || (op_mode == XFmode
10174 && ! (standard_80387_constant_p (op0) == 1
10175 || standard_80387_constant_p (op1) == 1)
10176 && GET_CODE (op1) != FLOAT)
10177 || ix86_use_fcomi_compare (code)))
10179 op0 = force_reg (op_mode, op0);
10180 op1 = force_reg (op_mode, op1);
10184 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10185 things around if they appear profitable, otherwise force op0
10186 into a register. */
10188 if (standard_80387_constant_p (op0) == 0
10189 || (GET_CODE (op0) == MEM
10190 && ! (standard_80387_constant_p (op1) == 0
10191 || GET_CODE (op1) == MEM)))
10194 tmp = op0, op0 = op1, op1 = tmp;
10195 code = swap_condition (code);
10198 if (GET_CODE (op0) != REG)
10199 op0 = force_reg (op_mode, op0);
10201 if (CONSTANT_P (op1))
10203 int tmp = standard_80387_constant_p (op1);
10205 op1 = validize_mem (force_const_mem (op_mode, op1));
10209 op1 = force_reg (op_mode, op1);
10212 op1 = force_reg (op_mode, op1);
10216 /* Try to rearrange the comparison to make it cheaper. */
10217 if (ix86_fp_comparison_cost (code)
10218 > ix86_fp_comparison_cost (swap_condition (code))
10219 && (GET_CODE (op1) == REG || !no_new_pseudos))
10222 tmp = op0, op0 = op1, op1 = tmp;
10223 code = swap_condition (code);
10224 if (GET_CODE (op0) != REG)
10225 op0 = force_reg (op_mode, op0);
10233 /* Convert comparison codes we use to represent FP comparison to integer
10234 code that will result in proper branch. Return UNKNOWN if no such code
10238 ix86_fp_compare_code_to_integer (enum rtx_code code)
10267 /* Split comparison code CODE into comparisons we can do using branch
10268 instructions. BYPASS_CODE is comparison code for branch that will
10269 branch around FIRST_CODE and SECOND_CODE. If some of branches
10270 is not required, set value to UNKNOWN.
10271 We never require more than two branches. */
10274 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10275 enum rtx_code *first_code,
10276 enum rtx_code *second_code)
10278 *first_code = code;
10279 *bypass_code = UNKNOWN;
10280 *second_code = UNKNOWN;
10282 /* The fcomi comparison sets flags as follows:
10292 case GT: /* GTU - CF=0 & ZF=0 */
10293 case GE: /* GEU - CF=0 */
10294 case ORDERED: /* PF=0 */
10295 case UNORDERED: /* PF=1 */
10296 case UNEQ: /* EQ - ZF=1 */
10297 case UNLT: /* LTU - CF=1 */
10298 case UNLE: /* LEU - CF=1 | ZF=1 */
10299 case LTGT: /* EQ - ZF=0 */
10301 case LT: /* LTU - CF=1 - fails on unordered */
10302 *first_code = UNLT;
10303 *bypass_code = UNORDERED;
10305 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10306 *first_code = UNLE;
10307 *bypass_code = UNORDERED;
10309 case EQ: /* EQ - ZF=1 - fails on unordered */
10310 *first_code = UNEQ;
10311 *bypass_code = UNORDERED;
10313 case NE: /* NE - ZF=0 - fails on unordered */
10314 *first_code = LTGT;
10315 *second_code = UNORDERED;
10317 case UNGE: /* GEU - CF=0 - fails on unordered */
10319 *second_code = UNORDERED;
10321 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10323 *second_code = UNORDERED;
10326 gcc_unreachable ();
10328 if (!TARGET_IEEE_FP)
10330 *second_code = UNKNOWN;
10331 *bypass_code = UNKNOWN;
10335 /* Return cost of comparison done fcom + arithmetics operations on AX.
10336 All following functions do use number of instructions as a cost metrics.
10337 In future this should be tweaked to compute bytes for optimize_size and
10338 take into account performance of various instructions on various CPUs. */
10340 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10342 if (!TARGET_IEEE_FP)
10344 /* The cost of code output by ix86_expand_fp_compare. */
10368 gcc_unreachable ();
10372 /* Return cost of comparison done using fcomi operation.
10373 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10375 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10377 enum rtx_code bypass_code, first_code, second_code;
10378 /* Return arbitrarily high cost when instruction is not supported - this
10379 prevents gcc from using it. */
10382 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10383 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10386 /* Return cost of comparison done using sahf operation.
10387 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10389 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10391 enum rtx_code bypass_code, first_code, second_code;
10392 /* Return arbitrarily high cost when instruction is not preferred - this
10393 avoids gcc from using it. */
10394 if (!TARGET_USE_SAHF && !optimize_size)
10396 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10397 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10400 /* Compute cost of the comparison done using any method.
10401 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10403 ix86_fp_comparison_cost (enum rtx_code code)
10405 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10408 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10409 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10411 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10412 if (min > sahf_cost)
10414 if (min > fcomi_cost)
10419 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10422 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10423 rtx *second_test, rtx *bypass_test)
10425 enum machine_mode fpcmp_mode, intcmp_mode;
10427 int cost = ix86_fp_comparison_cost (code);
10428 enum rtx_code bypass_code, first_code, second_code;
10430 fpcmp_mode = ix86_fp_compare_mode (code);
10431 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10434 *second_test = NULL_RTX;
10436 *bypass_test = NULL_RTX;
10438 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10440 /* Do fcomi/sahf based test when profitable. */
10441 if ((bypass_code == UNKNOWN || bypass_test)
10442 && (second_code == UNKNOWN || second_test)
10443 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10447 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10448 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10454 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10455 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10457 scratch = gen_reg_rtx (HImode);
10458 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10459 emit_insn (gen_x86_sahf_1 (scratch));
10462 /* The FP codes work out to act like unsigned. */
10463 intcmp_mode = fpcmp_mode;
10465 if (bypass_code != UNKNOWN)
10466 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10467 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10469 if (second_code != UNKNOWN)
10470 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10471 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10476 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10477 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10478 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10480 scratch = gen_reg_rtx (HImode);
10481 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10483 /* In the unordered case, we have to check C2 for NaN's, which
10484 doesn't happen to work out to anything nice combination-wise.
10485 So do some bit twiddling on the value we've got in AH to come
10486 up with an appropriate set of condition codes. */
10488 intcmp_mode = CCNOmode;
10493 if (code == GT || !TARGET_IEEE_FP)
10495 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10500 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10501 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10502 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10503 intcmp_mode = CCmode;
10509 if (code == LT && TARGET_IEEE_FP)
10511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10512 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10513 intcmp_mode = CCmode;
10518 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10524 if (code == GE || !TARGET_IEEE_FP)
10526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10539 if (code == LE && TARGET_IEEE_FP)
10541 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10542 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10544 intcmp_mode = CCmode;
10549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10555 if (code == EQ && TARGET_IEEE_FP)
10557 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10558 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10559 intcmp_mode = CCmode;
10564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10571 if (code == NE && TARGET_IEEE_FP)
10573 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10574 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10595 gcc_unreachable ();
10599 /* Return the test that should be put into the flags user, i.e.
10600 the bcc, scc, or cmov instruction. */
10601 return gen_rtx_fmt_ee (code, VOIDmode,
10602 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10607 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10610 op0 = ix86_compare_op0;
10611 op1 = ix86_compare_op1;
10614 *second_test = NULL_RTX;
10616 *bypass_test = NULL_RTX;
10618 if (ix86_compare_emitted)
10620 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10621 ix86_compare_emitted = NULL_RTX;
10623 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10624 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10625 second_test, bypass_test);
10627 ret = ix86_expand_int_compare (code, op0, op1);
10632 /* Return true if the CODE will result in nontrivial jump sequence. */
10634 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10636 enum rtx_code bypass_code, first_code, second_code;
10639 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10640 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10644 ix86_expand_branch (enum rtx_code code, rtx label)
10648 /* If we have emitted a compare insn, go straight to simple.
10649 ix86_expand_compare won't emit anything if ix86_compare_emitted
10651 if (ix86_compare_emitted)
10654 switch (GET_MODE (ix86_compare_op0))
10660 tmp = ix86_expand_compare (code, NULL, NULL);
10661 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10662 gen_rtx_LABEL_REF (VOIDmode, label),
10664 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10673 enum rtx_code bypass_code, first_code, second_code;
10675 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10676 &ix86_compare_op1);
10678 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10680 /* Check whether we will use the natural sequence with one jump. If
10681 so, we can expand jump early. Otherwise delay expansion by
10682 creating compound insn to not confuse optimizers. */
10683 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10686 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10687 gen_rtx_LABEL_REF (VOIDmode, label),
10688 pc_rtx, NULL_RTX, NULL_RTX);
10692 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10693 ix86_compare_op0, ix86_compare_op1);
10694 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10695 gen_rtx_LABEL_REF (VOIDmode, label),
10697 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10699 use_fcomi = ix86_use_fcomi_compare (code);
10700 vec = rtvec_alloc (3 + !use_fcomi);
10701 RTVEC_ELT (vec, 0) = tmp;
10703 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10705 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10708 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10710 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10719 /* Expand DImode branch into multiple compare+branch. */
10721 rtx lo[2], hi[2], label2;
10722 enum rtx_code code1, code2, code3;
10723 enum machine_mode submode;
10725 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10727 tmp = ix86_compare_op0;
10728 ix86_compare_op0 = ix86_compare_op1;
10729 ix86_compare_op1 = tmp;
10730 code = swap_condition (code);
10732 if (GET_MODE (ix86_compare_op0) == DImode)
10734 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10735 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10740 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10741 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10745 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10746 avoid two branches. This costs one extra insn, so disable when
10747 optimizing for size. */
10749 if ((code == EQ || code == NE)
10751 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10756 if (hi[1] != const0_rtx)
10757 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10758 NULL_RTX, 0, OPTAB_WIDEN);
10761 if (lo[1] != const0_rtx)
10762 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10763 NULL_RTX, 0, OPTAB_WIDEN);
10765 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10766 NULL_RTX, 0, OPTAB_WIDEN);
10768 ix86_compare_op0 = tmp;
10769 ix86_compare_op1 = const0_rtx;
10770 ix86_expand_branch (code, label);
10774 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10775 op1 is a constant and the low word is zero, then we can just
10776 examine the high word. */
10778 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10781 case LT: case LTU: case GE: case GEU:
10782 ix86_compare_op0 = hi[0];
10783 ix86_compare_op1 = hi[1];
10784 ix86_expand_branch (code, label);
10790 /* Otherwise, we need two or three jumps. */
10792 label2 = gen_label_rtx ();
10795 code2 = swap_condition (code);
10796 code3 = unsigned_condition (code);
10800 case LT: case GT: case LTU: case GTU:
10803 case LE: code1 = LT; code2 = GT; break;
10804 case GE: code1 = GT; code2 = LT; break;
10805 case LEU: code1 = LTU; code2 = GTU; break;
10806 case GEU: code1 = GTU; code2 = LTU; break;
10808 case EQ: code1 = UNKNOWN; code2 = NE; break;
10809 case NE: code2 = UNKNOWN; break;
10812 gcc_unreachable ();
10817 * if (hi(a) < hi(b)) goto true;
10818 * if (hi(a) > hi(b)) goto false;
10819 * if (lo(a) < lo(b)) goto true;
10823 ix86_compare_op0 = hi[0];
10824 ix86_compare_op1 = hi[1];
10826 if (code1 != UNKNOWN)
10827 ix86_expand_branch (code1, label);
10828 if (code2 != UNKNOWN)
10829 ix86_expand_branch (code2, label2);
10831 ix86_compare_op0 = lo[0];
10832 ix86_compare_op1 = lo[1];
10833 ix86_expand_branch (code3, label);
10835 if (code2 != UNKNOWN)
10836 emit_label (label2);
10841 gcc_unreachable ();
10845 /* Split branch based on floating point condition. */
10847 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10848 rtx target1, rtx target2, rtx tmp, rtx pushed)
10850 rtx second, bypass;
10851 rtx label = NULL_RTX;
10853 int bypass_probability = -1, second_probability = -1, probability = -1;
10856 if (target2 != pc_rtx)
10859 code = reverse_condition_maybe_unordered (code);
10864 condition = ix86_expand_fp_compare (code, op1, op2,
10865 tmp, &second, &bypass);
10867 /* Remove pushed operand from stack. */
10869 ix86_free_from_memory (GET_MODE (pushed));
10871 if (split_branch_probability >= 0)
10873 /* Distribute the probabilities across the jumps.
10874 Assume the BYPASS and SECOND to be always test
10876 probability = split_branch_probability;
10878 /* Value of 1 is low enough to make no need for probability
10879 to be updated. Later we may run some experiments and see
10880 if unordered values are more frequent in practice. */
10882 bypass_probability = 1;
10884 second_probability = 1;
10886 if (bypass != NULL_RTX)
10888 label = gen_label_rtx ();
10889 i = emit_jump_insn (gen_rtx_SET
10891 gen_rtx_IF_THEN_ELSE (VOIDmode,
10893 gen_rtx_LABEL_REF (VOIDmode,
10896 if (bypass_probability >= 0)
10898 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10899 GEN_INT (bypass_probability),
10902 i = emit_jump_insn (gen_rtx_SET
10904 gen_rtx_IF_THEN_ELSE (VOIDmode,
10905 condition, target1, target2)));
10906 if (probability >= 0)
10908 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10909 GEN_INT (probability),
10911 if (second != NULL_RTX)
10913 i = emit_jump_insn (gen_rtx_SET
10915 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10917 if (second_probability >= 0)
10919 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10920 GEN_INT (second_probability),
10923 if (label != NULL_RTX)
10924 emit_label (label);
10928 ix86_expand_setcc (enum rtx_code code, rtx dest)
10930 rtx ret, tmp, tmpreg, equiv;
10931 rtx second_test, bypass_test;
10933 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10934 return 0; /* FAIL */
10936 gcc_assert (GET_MODE (dest) == QImode);
10938 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10939 PUT_MODE (ret, QImode);
10944 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10945 if (bypass_test || second_test)
10947 rtx test = second_test;
10949 rtx tmp2 = gen_reg_rtx (QImode);
10952 gcc_assert (!second_test);
10953 test = bypass_test;
10955 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10957 PUT_MODE (test, QImode);
10958 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10961 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10963 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10966 /* Attach a REG_EQUAL note describing the comparison result. */
10967 if (ix86_compare_op0 && ix86_compare_op1)
10969 equiv = simplify_gen_relational (code, QImode,
10970 GET_MODE (ix86_compare_op0),
10971 ix86_compare_op0, ix86_compare_op1);
10972 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10975 return 1; /* DONE */
10978 /* Expand comparison setting or clearing carry flag. Return true when
10979 successful and set pop for the operation. */
10981 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10983 enum machine_mode mode =
10984 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10986 /* Do not handle DImode compares that go through special path. Also we can't
10987 deal with FP compares yet. This is possible to add. */
10988 if (mode == (TARGET_64BIT ? TImode : DImode))
10990 if (FLOAT_MODE_P (mode))
10992 rtx second_test = NULL, bypass_test = NULL;
10993 rtx compare_op, compare_seq;
10995 /* Shortcut: following common codes never translate into carry flag compares. */
10996 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10997 || code == ORDERED || code == UNORDERED)
11000 /* These comparisons require zero flag; swap operands so they won't. */
11001 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11002 && !TARGET_IEEE_FP)
11007 code = swap_condition (code);
11010 /* Try to expand the comparison and verify that we end up with carry flag
11011 based comparison. This is fails to be true only when we decide to expand
11012 comparison using arithmetic that is not too common scenario. */
11014 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11015 &second_test, &bypass_test);
11016 compare_seq = get_insns ();
11019 if (second_test || bypass_test)
11021 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11022 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11023 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11025 code = GET_CODE (compare_op);
11026 if (code != LTU && code != GEU)
11028 emit_insn (compare_seq);
11032 if (!INTEGRAL_MODE_P (mode))
11040 /* Convert a==0 into (unsigned)a<1. */
11043 if (op1 != const0_rtx)
11046 code = (code == EQ ? LTU : GEU);
11049 /* Convert a>b into b<a or a>=b-1. */
11052 if (GET_CODE (op1) == CONST_INT)
11054 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11055 /* Bail out on overflow. We still can swap operands but that
11056 would force loading of the constant into register. */
11057 if (op1 == const0_rtx
11058 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11060 code = (code == GTU ? GEU : LTU);
11067 code = (code == GTU ? LTU : GEU);
11071 /* Convert a>=0 into (unsigned)a<0x80000000. */
11074 if (mode == DImode || op1 != const0_rtx)
11076 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11077 code = (code == LT ? GEU : LTU);
11081 if (mode == DImode || op1 != constm1_rtx)
11083 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11084 code = (code == LE ? GEU : LTU);
11090 /* Swapping operands may cause constant to appear as first operand. */
11091 if (!nonimmediate_operand (op0, VOIDmode))
11093 if (no_new_pseudos)
11095 op0 = force_reg (mode, op0);
11097 ix86_compare_op0 = op0;
11098 ix86_compare_op1 = op1;
11099 *pop = ix86_expand_compare (code, NULL, NULL);
11100 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11105 ix86_expand_int_movcc (rtx operands[])
11107 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11108 rtx compare_seq, compare_op;
11109 rtx second_test, bypass_test;
11110 enum machine_mode mode = GET_MODE (operands[0]);
11111 bool sign_bit_compare_p = false;;
11114 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11115 compare_seq = get_insns ();
11118 compare_code = GET_CODE (compare_op);
11120 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11121 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11122 sign_bit_compare_p = true;
11124 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11125 HImode insns, we'd be swallowed in word prefix ops. */
11127 if ((mode != HImode || TARGET_FAST_PREFIX)
11128 && (mode != (TARGET_64BIT ? TImode : DImode))
11129 && GET_CODE (operands[2]) == CONST_INT
11130 && GET_CODE (operands[3]) == CONST_INT)
11132 rtx out = operands[0];
11133 HOST_WIDE_INT ct = INTVAL (operands[2]);
11134 HOST_WIDE_INT cf = INTVAL (operands[3]);
11135 HOST_WIDE_INT diff;
11138 /* Sign bit compares are better done using shifts than we do by using
11140 if (sign_bit_compare_p
11141 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11142 ix86_compare_op1, &compare_op))
11144 /* Detect overlap between destination and compare sources. */
11147 if (!sign_bit_compare_p)
11149 bool fpcmp = false;
11151 compare_code = GET_CODE (compare_op);
11153 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11154 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11157 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11160 /* To simplify rest of code, restrict to the GEU case. */
11161 if (compare_code == LTU)
11163 HOST_WIDE_INT tmp = ct;
11166 compare_code = reverse_condition (compare_code);
11167 code = reverse_condition (code);
11172 PUT_CODE (compare_op,
11173 reverse_condition_maybe_unordered
11174 (GET_CODE (compare_op)));
11176 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11180 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11181 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11182 tmp = gen_reg_rtx (mode);
11184 if (mode == DImode)
11185 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11187 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11191 if (code == GT || code == GE)
11192 code = reverse_condition (code);
11195 HOST_WIDE_INT tmp = ct;
11200 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11201 ix86_compare_op1, VOIDmode, 0, -1);
11214 tmp = expand_simple_binop (mode, PLUS,
11216 copy_rtx (tmp), 1, OPTAB_DIRECT);
11227 tmp = expand_simple_binop (mode, IOR,
11229 copy_rtx (tmp), 1, OPTAB_DIRECT);
11231 else if (diff == -1 && ct)
11241 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11243 tmp = expand_simple_binop (mode, PLUS,
11244 copy_rtx (tmp), GEN_INT (cf),
11245 copy_rtx (tmp), 1, OPTAB_DIRECT);
11253 * andl cf - ct, dest
11263 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11266 tmp = expand_simple_binop (mode, AND,
11268 gen_int_mode (cf - ct, mode),
11269 copy_rtx (tmp), 1, OPTAB_DIRECT);
11271 tmp = expand_simple_binop (mode, PLUS,
11272 copy_rtx (tmp), GEN_INT (ct),
11273 copy_rtx (tmp), 1, OPTAB_DIRECT);
11276 if (!rtx_equal_p (tmp, out))
11277 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11279 return 1; /* DONE */
11285 tmp = ct, ct = cf, cf = tmp;
11287 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11289 /* We may be reversing unordered compare to normal compare, that
11290 is not valid in general (we may convert non-trapping condition
11291 to trapping one), however on i386 we currently emit all
11292 comparisons unordered. */
11293 compare_code = reverse_condition_maybe_unordered (compare_code);
11294 code = reverse_condition_maybe_unordered (code);
11298 compare_code = reverse_condition (compare_code);
11299 code = reverse_condition (code);
11303 compare_code = UNKNOWN;
11304 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11305 && GET_CODE (ix86_compare_op1) == CONST_INT)
11307 if (ix86_compare_op1 == const0_rtx
11308 && (code == LT || code == GE))
11309 compare_code = code;
11310 else if (ix86_compare_op1 == constm1_rtx)
11314 else if (code == GT)
11319 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11320 if (compare_code != UNKNOWN
11321 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11322 && (cf == -1 || ct == -1))
11324 /* If lea code below could be used, only optimize
11325 if it results in a 2 insn sequence. */
11327 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11328 || diff == 3 || diff == 5 || diff == 9)
11329 || (compare_code == LT && ct == -1)
11330 || (compare_code == GE && cf == -1))
11333 * notl op1 (if necessary)
11341 code = reverse_condition (code);
11344 out = emit_store_flag (out, code, ix86_compare_op0,
11345 ix86_compare_op1, VOIDmode, 0, -1);
11347 out = expand_simple_binop (mode, IOR,
11349 out, 1, OPTAB_DIRECT);
11350 if (out != operands[0])
11351 emit_move_insn (operands[0], out);
11353 return 1; /* DONE */
11358 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11359 || diff == 3 || diff == 5 || diff == 9)
11360 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11362 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11368 * lea cf(dest*(ct-cf)),dest
11372 * This also catches the degenerate setcc-only case.
11378 out = emit_store_flag (out, code, ix86_compare_op0,
11379 ix86_compare_op1, VOIDmode, 0, 1);
11382 /* On x86_64 the lea instruction operates on Pmode, so we need
11383 to get arithmetics done in proper mode to match. */
11385 tmp = copy_rtx (out);
11389 out1 = copy_rtx (out);
11390 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11394 tmp = gen_rtx_PLUS (mode, tmp, out1);
11400 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11403 if (!rtx_equal_p (tmp, out))
11406 out = force_operand (tmp, copy_rtx (out));
11408 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11410 if (!rtx_equal_p (out, operands[0]))
11411 emit_move_insn (operands[0], copy_rtx (out));
11413 return 1; /* DONE */
11417 * General case: Jumpful:
11418 * xorl dest,dest cmpl op1, op2
11419 * cmpl op1, op2 movl ct, dest
11420 * setcc dest jcc 1f
11421 * decl dest movl cf, dest
11422 * andl (cf-ct),dest 1:
11425 * Size 20. Size 14.
11427 * This is reasonably steep, but branch mispredict costs are
11428 * high on modern cpus, so consider failing only if optimizing
11432 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11433 && BRANCH_COST >= 2)
11439 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11440 /* We may be reversing unordered compare to normal compare,
11441 that is not valid in general (we may convert non-trapping
11442 condition to trapping one), however on i386 we currently
11443 emit all comparisons unordered. */
11444 code = reverse_condition_maybe_unordered (code);
11447 code = reverse_condition (code);
11448 if (compare_code != UNKNOWN)
11449 compare_code = reverse_condition (compare_code);
11453 if (compare_code != UNKNOWN)
11455 /* notl op1 (if needed)
11460 For x < 0 (resp. x <= -1) there will be no notl,
11461 so if possible swap the constants to get rid of the
11463 True/false will be -1/0 while code below (store flag
11464 followed by decrement) is 0/-1, so the constants need
11465 to be exchanged once more. */
11467 if (compare_code == GE || !cf)
11469 code = reverse_condition (code);
11474 HOST_WIDE_INT tmp = cf;
11479 out = emit_store_flag (out, code, ix86_compare_op0,
11480 ix86_compare_op1, VOIDmode, 0, -1);
11484 out = emit_store_flag (out, code, ix86_compare_op0,
11485 ix86_compare_op1, VOIDmode, 0, 1);
11487 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11488 copy_rtx (out), 1, OPTAB_DIRECT);
11491 out = expand_simple_binop (mode, AND, copy_rtx (out),
11492 gen_int_mode (cf - ct, mode),
11493 copy_rtx (out), 1, OPTAB_DIRECT);
11495 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11496 copy_rtx (out), 1, OPTAB_DIRECT);
11497 if (!rtx_equal_p (out, operands[0]))
11498 emit_move_insn (operands[0], copy_rtx (out));
11500 return 1; /* DONE */
11504 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11506 /* Try a few things more with specific constants and a variable. */
11509 rtx var, orig_out, out, tmp;
11511 if (BRANCH_COST <= 2)
11512 return 0; /* FAIL */
11514 /* If one of the two operands is an interesting constant, load a
11515 constant with the above and mask it in with a logical operation. */
11517 if (GET_CODE (operands[2]) == CONST_INT)
11520 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11521 operands[3] = constm1_rtx, op = and_optab;
11522 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11523 operands[3] = const0_rtx, op = ior_optab;
11525 return 0; /* FAIL */
11527 else if (GET_CODE (operands[3]) == CONST_INT)
11530 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11531 operands[2] = constm1_rtx, op = and_optab;
11532 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11533 operands[2] = const0_rtx, op = ior_optab;
11535 return 0; /* FAIL */
11538 return 0; /* FAIL */
11540 orig_out = operands[0];
11541 tmp = gen_reg_rtx (mode);
11544 /* Recurse to get the constant loaded. */
11545 if (ix86_expand_int_movcc (operands) == 0)
11546 return 0; /* FAIL */
11548 /* Mask in the interesting variable. */
11549 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11551 if (!rtx_equal_p (out, orig_out))
11552 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11554 return 1; /* DONE */
11558 * For comparison with above,
11568 if (! nonimmediate_operand (operands[2], mode))
11569 operands[2] = force_reg (mode, operands[2]);
11570 if (! nonimmediate_operand (operands[3], mode))
11571 operands[3] = force_reg (mode, operands[3]);
11573 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11575 rtx tmp = gen_reg_rtx (mode);
11576 emit_move_insn (tmp, operands[3]);
11579 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11581 rtx tmp = gen_reg_rtx (mode);
11582 emit_move_insn (tmp, operands[2]);
11586 if (! register_operand (operands[2], VOIDmode)
11588 || ! register_operand (operands[3], VOIDmode)))
11589 operands[2] = force_reg (mode, operands[2]);
11592 && ! register_operand (operands[3], VOIDmode))
11593 operands[3] = force_reg (mode, operands[3]);
11595 emit_insn (compare_seq);
11596 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11597 gen_rtx_IF_THEN_ELSE (mode,
11598 compare_op, operands[2],
11601 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11602 gen_rtx_IF_THEN_ELSE (mode,
11604 copy_rtx (operands[3]),
11605 copy_rtx (operands[0]))));
11607 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11608 gen_rtx_IF_THEN_ELSE (mode,
11610 copy_rtx (operands[2]),
11611 copy_rtx (operands[0]))));
11613 return 1; /* DONE */
11616 /* Swap, force into registers, or otherwise massage the two operands
11617 to an sse comparison with a mask result. Thus we differ a bit from
11618 ix86_prepare_fp_compare_args which expects to produce a flags result.
11620 The DEST operand exists to help determine whether to commute commutative
11621 operators. The POP0/POP1 operands are updated in place. The new
11622 comparison code is returned, or UNKNOWN if not implementable. */
11624 static enum rtx_code
11625 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11626 rtx *pop0, rtx *pop1)
11634 /* We have no LTGT as an operator. We could implement it with
11635 NE & ORDERED, but this requires an extra temporary. It's
11636 not clear that it's worth it. */
11643 /* These are supported directly. */
11650 /* For commutative operators, try to canonicalize the destination
11651 operand to be first in the comparison - this helps reload to
11652 avoid extra moves. */
11653 if (!dest || !rtx_equal_p (dest, *pop1))
11661 /* These are not supported directly. Swap the comparison operands
11662 to transform into something that is supported. */
11666 code = swap_condition (code);
11670 gcc_unreachable ();
11676 /* Detect conditional moves that exactly match min/max operational
11677 semantics. Note that this is IEEE safe, as long as we don't
11678 interchange the operands.
11680 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11681 and TRUE if the operation is successful and instructions are emitted. */
11684 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11685 rtx cmp_op1, rtx if_true, rtx if_false)
11687 enum machine_mode mode;
11693 else if (code == UNGE)
11696 if_true = if_false;
11702 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11704 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11709 mode = GET_MODE (dest);
11711 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11712 but MODE may be a vector mode and thus not appropriate. */
11713 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11715 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11718 if_true = force_reg (mode, if_true);
11719 v = gen_rtvec (2, if_true, if_false);
11720 tmp = gen_rtx_UNSPEC (mode, v, u);
11724 code = is_min ? SMIN : SMAX;
11725 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11728 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11732 /* Expand an sse vector comparison. Return the register with the result. */
11735 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11736 rtx op_true, rtx op_false)
11738 enum machine_mode mode = GET_MODE (dest);
11741 cmp_op0 = force_reg (mode, cmp_op0);
11742 if (!nonimmediate_operand (cmp_op1, mode))
11743 cmp_op1 = force_reg (mode, cmp_op1);
11746 || reg_overlap_mentioned_p (dest, op_true)
11747 || reg_overlap_mentioned_p (dest, op_false))
11748 dest = gen_reg_rtx (mode);
11750 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11756 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11757 operations. This is used for both scalar and vector conditional moves. */
11760 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11762 enum machine_mode mode = GET_MODE (dest);
11765 if (op_false == CONST0_RTX (mode))
11767 op_true = force_reg (mode, op_true);
11768 x = gen_rtx_AND (mode, cmp, op_true);
11769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11771 else if (op_true == CONST0_RTX (mode))
11773 op_false = force_reg (mode, op_false);
11774 x = gen_rtx_NOT (mode, cmp);
11775 x = gen_rtx_AND (mode, x, op_false);
11776 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11780 op_true = force_reg (mode, op_true);
11781 op_false = force_reg (mode, op_false);
11783 t2 = gen_reg_rtx (mode);
11785 t3 = gen_reg_rtx (mode);
11789 x = gen_rtx_AND (mode, op_true, cmp);
11790 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11792 x = gen_rtx_NOT (mode, cmp);
11793 x = gen_rtx_AND (mode, x, op_false);
11794 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11796 x = gen_rtx_IOR (mode, t3, t2);
11797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11801 /* Expand a floating-point conditional move. Return true if successful. */
11804 ix86_expand_fp_movcc (rtx operands[])
11806 enum machine_mode mode = GET_MODE (operands[0]);
11807 enum rtx_code code = GET_CODE (operands[1]);
11808 rtx tmp, compare_op, second_test, bypass_test;
11810 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11812 enum machine_mode cmode;
11814 /* Since we've no cmove for sse registers, don't force bad register
11815 allocation just to gain access to it. Deny movcc when the
11816 comparison mode doesn't match the move mode. */
11817 cmode = GET_MODE (ix86_compare_op0);
11818 if (cmode == VOIDmode)
11819 cmode = GET_MODE (ix86_compare_op1);
11823 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11825 &ix86_compare_op1);
11826 if (code == UNKNOWN)
11829 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11830 ix86_compare_op1, operands[2],
11834 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11835 ix86_compare_op1, operands[2], operands[3]);
11836 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11840 /* The floating point conditional move instructions don't directly
11841 support conditions resulting from a signed integer comparison. */
11843 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11845 /* The floating point conditional move instructions don't directly
11846 support signed integer comparisons. */
11848 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11850 gcc_assert (!second_test && !bypass_test);
11851 tmp = gen_reg_rtx (QImode);
11852 ix86_expand_setcc (code, tmp);
11854 ix86_compare_op0 = tmp;
11855 ix86_compare_op1 = const0_rtx;
11856 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11858 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11860 tmp = gen_reg_rtx (mode);
11861 emit_move_insn (tmp, operands[3]);
11864 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11866 tmp = gen_reg_rtx (mode);
11867 emit_move_insn (tmp, operands[2]);
11871 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11872 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11873 operands[2], operands[3])));
11875 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11876 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11877 operands[3], operands[0])));
11879 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11880 gen_rtx_IF_THEN_ELSE (mode, second_test,
11881 operands[2], operands[0])));
11886 /* Expand a floating-point vector conditional move; a vcond operation
11887 rather than a movcc operation. */
11890 ix86_expand_fp_vcond (rtx operands[])
11892 enum rtx_code code = GET_CODE (operands[3]);
11895 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11896 &operands[4], &operands[5]);
11897 if (code == UNKNOWN)
11900 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11901 operands[5], operands[1], operands[2]))
11904 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11905 operands[1], operands[2]);
11906 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11910 /* Expand a signed integral vector conditional move. */
11913 ix86_expand_int_vcond (rtx operands[])
11915 enum machine_mode mode = GET_MODE (operands[0]);
11916 enum rtx_code code = GET_CODE (operands[3]);
11917 bool negate = false;
11920 cop0 = operands[4];
11921 cop1 = operands[5];
11923 /* Canonicalize the comparison to EQ, GT, GTU. */
11934 code = reverse_condition (code);
11940 code = reverse_condition (code);
11946 code = swap_condition (code);
11947 x = cop0, cop0 = cop1, cop1 = x;
11951 gcc_unreachable ();
11954 /* Unsigned parallel compare is not supported by the hardware. Play some
11955 tricks to turn this into a signed comparison against 0. */
11958 cop0 = force_reg (mode, cop0);
11966 /* Perform a parallel modulo subtraction. */
11967 t1 = gen_reg_rtx (mode);
11968 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11970 /* Extract the original sign bit of op0. */
11971 mask = GEN_INT (-0x80000000);
11972 mask = gen_rtx_CONST_VECTOR (mode,
11973 gen_rtvec (4, mask, mask, mask, mask));
11974 mask = force_reg (mode, mask);
11975 t2 = gen_reg_rtx (mode);
11976 emit_insn (gen_andv4si3 (t2, cop0, mask));
11978 /* XOR it back into the result of the subtraction. This results
11979 in the sign bit set iff we saw unsigned underflow. */
11980 x = gen_reg_rtx (mode);
11981 emit_insn (gen_xorv4si3 (x, t1, t2));
11989 /* Perform a parallel unsigned saturating subtraction. */
11990 x = gen_reg_rtx (mode);
11991 emit_insn (gen_rtx_SET (VOIDmode, x,
11992 gen_rtx_US_MINUS (mode, cop0, cop1)));
11999 gcc_unreachable ();
12003 cop1 = CONST0_RTX (mode);
12006 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12007 operands[1+negate], operands[2-negate]);
12009 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12010 operands[2-negate]);
12014 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12015 true if we should do zero extension, else sign extension. HIGH_P is
12016 true if we want the N/2 high elements, else the low elements. */
12019 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12021 enum machine_mode imode = GET_MODE (operands[1]);
12022 rtx (*unpack)(rtx, rtx, rtx);
12029 unpack = gen_vec_interleave_highv16qi;
12031 unpack = gen_vec_interleave_lowv16qi;
12035 unpack = gen_vec_interleave_highv8hi;
12037 unpack = gen_vec_interleave_lowv8hi;
12041 unpack = gen_vec_interleave_highv4si;
12043 unpack = gen_vec_interleave_lowv4si;
12046 gcc_unreachable ();
12049 dest = gen_lowpart (imode, operands[0]);
12052 se = force_reg (imode, CONST0_RTX (imode));
12054 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12055 operands[1], pc_rtx, pc_rtx);
12057 emit_insn (unpack (dest, operands[1], se));
12060 /* Expand conditional increment or decrement using adb/sbb instructions.
12061 The default case using setcc followed by the conditional move can be
12062 done by generic code. */
12064 ix86_expand_int_addcc (rtx operands[])
12066 enum rtx_code code = GET_CODE (operands[1]);
12068 rtx val = const0_rtx;
12069 bool fpcmp = false;
12070 enum machine_mode mode = GET_MODE (operands[0]);
12072 if (operands[3] != const1_rtx
12073 && operands[3] != constm1_rtx)
12075 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12076 ix86_compare_op1, &compare_op))
12078 code = GET_CODE (compare_op);
12080 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12081 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12084 code = ix86_fp_compare_code_to_integer (code);
12091 PUT_CODE (compare_op,
12092 reverse_condition_maybe_unordered
12093 (GET_CODE (compare_op)));
12095 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12097 PUT_MODE (compare_op, mode);
12099 /* Construct either adc or sbb insn. */
12100 if ((code == LTU) == (operands[3] == constm1_rtx))
12102 switch (GET_MODE (operands[0]))
12105 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12108 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12111 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12114 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12117 gcc_unreachable ();
12122 switch (GET_MODE (operands[0]))
12125 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12128 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12131 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12134 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12137 gcc_unreachable ();
12140 return 1; /* DONE */
12144 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12145 works for floating pointer parameters and nonoffsetable memories.
12146 For pushes, it returns just stack offsets; the values will be saved
12147 in the right order. Maximally three parts are generated. */
12150 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12155 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12157 size = (GET_MODE_SIZE (mode) + 4) / 8;
12159 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12160 gcc_assert (size >= 2 && size <= 3);
12162 /* Optimize constant pool reference to immediates. This is used by fp
12163 moves, that force all constants to memory to allow combining. */
12164 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12166 rtx tmp = maybe_get_pool_constant (operand);
12171 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12173 /* The only non-offsetable memories we handle are pushes. */
12174 int ok = push_operand (operand, VOIDmode);
12178 operand = copy_rtx (operand);
12179 PUT_MODE (operand, Pmode);
12180 parts[0] = parts[1] = parts[2] = operand;
12184 if (GET_CODE (operand) == CONST_VECTOR)
12186 enum machine_mode imode = int_mode_for_mode (mode);
12187 /* Caution: if we looked through a constant pool memory above,
12188 the operand may actually have a different mode now. That's
12189 ok, since we want to pun this all the way back to an integer. */
12190 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12191 gcc_assert (operand != NULL);
12197 if (mode == DImode)
12198 split_di (&operand, 1, &parts[0], &parts[1]);
12201 if (REG_P (operand))
12203 gcc_assert (reload_completed);
12204 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12205 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12207 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12209 else if (offsettable_memref_p (operand))
12211 operand = adjust_address (operand, SImode, 0);
12212 parts[0] = operand;
12213 parts[1] = adjust_address (operand, SImode, 4);
12215 parts[2] = adjust_address (operand, SImode, 8);
12217 else if (GET_CODE (operand) == CONST_DOUBLE)
12222 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12226 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12227 parts[2] = gen_int_mode (l[2], SImode);
12230 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12233 gcc_unreachable ();
12235 parts[1] = gen_int_mode (l[1], SImode);
12236 parts[0] = gen_int_mode (l[0], SImode);
12239 gcc_unreachable ();
12244 if (mode == TImode)
12245 split_ti (&operand, 1, &parts[0], &parts[1]);
12246 if (mode == XFmode || mode == TFmode)
12248 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12249 if (REG_P (operand))
12251 gcc_assert (reload_completed);
12252 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12253 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12255 else if (offsettable_memref_p (operand))
12257 operand = adjust_address (operand, DImode, 0);
12258 parts[0] = operand;
12259 parts[1] = adjust_address (operand, upper_mode, 8);
12261 else if (GET_CODE (operand) == CONST_DOUBLE)
12266 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12267 real_to_target (l, &r, mode);
12269 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12270 if (HOST_BITS_PER_WIDE_INT >= 64)
12273 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12274 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12277 parts[0] = immed_double_const (l[0], l[1], DImode);
12279 if (upper_mode == SImode)
12280 parts[1] = gen_int_mode (l[2], SImode);
12281 else if (HOST_BITS_PER_WIDE_INT >= 64)
12284 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12285 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12288 parts[1] = immed_double_const (l[2], l[3], DImode);
12291 gcc_unreachable ();
12298 /* Emit insns to perform a move or push of DI, DF, and XF values.
12299 Return false when normal moves are needed; true when all required
12300 insns have been emitted. Operands 2-4 contain the input values
12301 int the correct order; operands 5-7 contain the output values. */
12304 ix86_split_long_move (rtx operands[])
12309 int collisions = 0;
12310 enum machine_mode mode = GET_MODE (operands[0]);
12312 /* The DFmode expanders may ask us to move double.
12313 For 64bit target this is single move. By hiding the fact
12314 here we simplify i386.md splitters. */
12315 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12317 /* Optimize constant pool reference to immediates. This is used by
12318 fp moves, that force all constants to memory to allow combining. */
12320 if (GET_CODE (operands[1]) == MEM
12321 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12322 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12323 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12324 if (push_operand (operands[0], VOIDmode))
12326 operands[0] = copy_rtx (operands[0]);
12327 PUT_MODE (operands[0], Pmode);
12330 operands[0] = gen_lowpart (DImode, operands[0]);
12331 operands[1] = gen_lowpart (DImode, operands[1]);
12332 emit_move_insn (operands[0], operands[1]);
12336 /* The only non-offsettable memory we handle is push. */
12337 if (push_operand (operands[0], VOIDmode))
12340 gcc_assert (GET_CODE (operands[0]) != MEM
12341 || offsettable_memref_p (operands[0]));
12343 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12344 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12346 /* When emitting push, take care for source operands on the stack. */
12347 if (push && GET_CODE (operands[1]) == MEM
12348 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12351 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12352 XEXP (part[1][2], 0));
12353 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12354 XEXP (part[1][1], 0));
12357 /* We need to do copy in the right order in case an address register
12358 of the source overlaps the destination. */
12359 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12361 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12363 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12366 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12369 /* Collision in the middle part can be handled by reordering. */
12370 if (collisions == 1 && nparts == 3
12371 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12374 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12375 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12378 /* If there are more collisions, we can't handle it by reordering.
12379 Do an lea to the last part and use only one colliding move. */
12380 else if (collisions > 1)
12386 base = part[0][nparts - 1];
12388 /* Handle the case when the last part isn't valid for lea.
12389 Happens in 64-bit mode storing the 12-byte XFmode. */
12390 if (GET_MODE (base) != Pmode)
12391 base = gen_rtx_REG (Pmode, REGNO (base));
12393 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12394 part[1][0] = replace_equiv_address (part[1][0], base);
12395 part[1][1] = replace_equiv_address (part[1][1],
12396 plus_constant (base, UNITS_PER_WORD));
12398 part[1][2] = replace_equiv_address (part[1][2],
12399 plus_constant (base, 8));
12409 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12410 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12411 emit_move_insn (part[0][2], part[1][2]);
12416 /* In 64bit mode we don't have 32bit push available. In case this is
12417 register, it is OK - we will just use larger counterpart. We also
12418 retype memory - these comes from attempt to avoid REX prefix on
12419 moving of second half of TFmode value. */
12420 if (GET_MODE (part[1][1]) == SImode)
12422 switch (GET_CODE (part[1][1]))
12425 part[1][1] = adjust_address (part[1][1], DImode, 0);
12429 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12433 gcc_unreachable ();
12436 if (GET_MODE (part[1][0]) == SImode)
12437 part[1][0] = part[1][1];
12440 emit_move_insn (part[0][1], part[1][1]);
12441 emit_move_insn (part[0][0], part[1][0]);
12445 /* Choose correct order to not overwrite the source before it is copied. */
12446 if ((REG_P (part[0][0])
12447 && REG_P (part[1][1])
12448 && (REGNO (part[0][0]) == REGNO (part[1][1])
12450 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12452 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12456 operands[2] = part[0][2];
12457 operands[3] = part[0][1];
12458 operands[4] = part[0][0];
12459 operands[5] = part[1][2];
12460 operands[6] = part[1][1];
12461 operands[7] = part[1][0];
12465 operands[2] = part[0][1];
12466 operands[3] = part[0][0];
12467 operands[5] = part[1][1];
12468 operands[6] = part[1][0];
12475 operands[2] = part[0][0];
12476 operands[3] = part[0][1];
12477 operands[4] = part[0][2];
12478 operands[5] = part[1][0];
12479 operands[6] = part[1][1];
12480 operands[7] = part[1][2];
12484 operands[2] = part[0][0];
12485 operands[3] = part[0][1];
12486 operands[5] = part[1][0];
12487 operands[6] = part[1][1];
12491 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12494 if (GET_CODE (operands[5]) == CONST_INT
12495 && operands[5] != const0_rtx
12496 && REG_P (operands[2]))
12498 if (GET_CODE (operands[6]) == CONST_INT
12499 && INTVAL (operands[6]) == INTVAL (operands[5]))
12500 operands[6] = operands[2];
12503 && GET_CODE (operands[7]) == CONST_INT
12504 && INTVAL (operands[7]) == INTVAL (operands[5]))
12505 operands[7] = operands[2];
12509 && GET_CODE (operands[6]) == CONST_INT
12510 && operands[6] != const0_rtx
12511 && REG_P (operands[3])
12512 && GET_CODE (operands[7]) == CONST_INT
12513 && INTVAL (operands[7]) == INTVAL (operands[6]))
12514 operands[7] = operands[3];
12517 emit_move_insn (operands[2], operands[5]);
12518 emit_move_insn (operands[3], operands[6]);
12520 emit_move_insn (operands[4], operands[7]);
12525 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12526 left shift by a constant, either using a single shift or
12527 a sequence of add instructions. */
12530 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12534 emit_insn ((mode == DImode
12536 : gen_adddi3) (operand, operand, operand));
12538 else if (!optimize_size
12539 && count * ix86_cost->add <= ix86_cost->shift_const)
12542 for (i=0; i<count; i++)
12544 emit_insn ((mode == DImode
12546 : gen_adddi3) (operand, operand, operand));
12550 emit_insn ((mode == DImode
12552 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12556 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12558 rtx low[2], high[2];
12560 const int single_width = mode == DImode ? 32 : 64;
12562 if (GET_CODE (operands[2]) == CONST_INT)
12564 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12565 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12567 if (count >= single_width)
12569 emit_move_insn (high[0], low[1]);
12570 emit_move_insn (low[0], const0_rtx);
12572 if (count > single_width)
12573 ix86_expand_ashl_const (high[0], count - single_width, mode);
12577 if (!rtx_equal_p (operands[0], operands[1]))
12578 emit_move_insn (operands[0], operands[1]);
12579 emit_insn ((mode == DImode
12581 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12582 ix86_expand_ashl_const (low[0], count, mode);
12587 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12589 if (operands[1] == const1_rtx)
12591 /* Assuming we've chosen a QImode capable registers, then 1 << N
12592 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12593 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12595 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12597 ix86_expand_clear (low[0]);
12598 ix86_expand_clear (high[0]);
12599 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12601 d = gen_lowpart (QImode, low[0]);
12602 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12603 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12604 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12606 d = gen_lowpart (QImode, high[0]);
12607 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12608 s = gen_rtx_NE (QImode, flags, const0_rtx);
12609 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12612 /* Otherwise, we can get the same results by manually performing
12613 a bit extract operation on bit 5/6, and then performing the two
12614 shifts. The two methods of getting 0/1 into low/high are exactly
12615 the same size. Avoiding the shift in the bit extract case helps
12616 pentium4 a bit; no one else seems to care much either way. */
12621 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12622 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12624 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12625 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12627 emit_insn ((mode == DImode
12629 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12630 emit_insn ((mode == DImode
12632 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12633 emit_move_insn (low[0], high[0]);
12634 emit_insn ((mode == DImode
12636 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12639 emit_insn ((mode == DImode
12641 : gen_ashldi3) (low[0], low[0], operands[2]));
12642 emit_insn ((mode == DImode
12644 : gen_ashldi3) (high[0], high[0], operands[2]));
12648 if (operands[1] == constm1_rtx)
12650 /* For -1 << N, we can avoid the shld instruction, because we
12651 know that we're shifting 0...31/63 ones into a -1. */
12652 emit_move_insn (low[0], constm1_rtx);
12654 emit_move_insn (high[0], low[0]);
12656 emit_move_insn (high[0], constm1_rtx);
12660 if (!rtx_equal_p (operands[0], operands[1]))
12661 emit_move_insn (operands[0], operands[1]);
12663 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12664 emit_insn ((mode == DImode
12666 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12669 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12671 if (TARGET_CMOVE && scratch)
12673 ix86_expand_clear (scratch);
12674 emit_insn ((mode == DImode
12675 ? gen_x86_shift_adj_1
12676 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12679 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12683 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12685 rtx low[2], high[2];
12687 const int single_width = mode == DImode ? 32 : 64;
12689 if (GET_CODE (operands[2]) == CONST_INT)
12691 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12692 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12694 if (count == single_width * 2 - 1)
12696 emit_move_insn (high[0], high[1]);
12697 emit_insn ((mode == DImode
12699 : gen_ashrdi3) (high[0], high[0],
12700 GEN_INT (single_width - 1)));
12701 emit_move_insn (low[0], high[0]);
12704 else if (count >= single_width)
12706 emit_move_insn (low[0], high[1]);
12707 emit_move_insn (high[0], low[0]);
12708 emit_insn ((mode == DImode
12710 : gen_ashrdi3) (high[0], high[0],
12711 GEN_INT (single_width - 1)));
12712 if (count > single_width)
12713 emit_insn ((mode == DImode
12715 : gen_ashrdi3) (low[0], low[0],
12716 GEN_INT (count - single_width)));
12720 if (!rtx_equal_p (operands[0], operands[1]))
12721 emit_move_insn (operands[0], operands[1]);
12722 emit_insn ((mode == DImode
12724 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12725 emit_insn ((mode == DImode
12727 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12732 if (!rtx_equal_p (operands[0], operands[1]))
12733 emit_move_insn (operands[0], operands[1]);
12735 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12737 emit_insn ((mode == DImode
12739 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12740 emit_insn ((mode == DImode
12742 : gen_ashrdi3) (high[0], high[0], operands[2]));
12744 if (TARGET_CMOVE && scratch)
12746 emit_move_insn (scratch, high[0]);
12747 emit_insn ((mode == DImode
12749 : gen_ashrdi3) (scratch, scratch,
12750 GEN_INT (single_width - 1)));
12751 emit_insn ((mode == DImode
12752 ? gen_x86_shift_adj_1
12753 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12757 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12762 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12764 rtx low[2], high[2];
12766 const int single_width = mode == DImode ? 32 : 64;
12768 if (GET_CODE (operands[2]) == CONST_INT)
12770 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12771 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12773 if (count >= single_width)
12775 emit_move_insn (low[0], high[1]);
12776 ix86_expand_clear (high[0]);
12778 if (count > single_width)
12779 emit_insn ((mode == DImode
12781 : gen_lshrdi3) (low[0], low[0],
12782 GEN_INT (count - single_width)));
12786 if (!rtx_equal_p (operands[0], operands[1]))
12787 emit_move_insn (operands[0], operands[1]);
12788 emit_insn ((mode == DImode
12790 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12791 emit_insn ((mode == DImode
12793 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12798 if (!rtx_equal_p (operands[0], operands[1]))
12799 emit_move_insn (operands[0], operands[1]);
12801 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12803 emit_insn ((mode == DImode
12805 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12806 emit_insn ((mode == DImode
12808 : gen_lshrdi3) (high[0], high[0], operands[2]));
12810 /* Heh. By reversing the arguments, we can reuse this pattern. */
12811 if (TARGET_CMOVE && scratch)
12813 ix86_expand_clear (scratch);
12814 emit_insn ((mode == DImode
12815 ? gen_x86_shift_adj_1
12816 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12820 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12824 /* Predict just emitted jump instruction to be taken with probability PROB. */
12826 predict_jump (int prob)
12828 rtx insn = get_last_insn ();
12829 gcc_assert (GET_CODE (insn) == JUMP_INSN);
12831 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12836 /* Helper function for the string operations below. Dest VARIABLE whether
12837 it is aligned to VALUE bytes. If true, jump to the label. */
12839 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
12841 rtx label = gen_label_rtx ();
12842 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12843 if (GET_MODE (variable) == DImode)
12844 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12846 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12847 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12850 predict_jump (REG_BR_PROB_BASE * 50 / 100);
12852 predict_jump (REG_BR_PROB_BASE * 90 / 100);
12856 /* Adjust COUNTER by the VALUE. */
12858 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12860 if (GET_MODE (countreg) == DImode)
12861 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12863 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12866 /* Zero extend possibly SImode EXP to Pmode register. */
12868 ix86_zero_extend_to_Pmode (rtx exp)
12871 if (GET_MODE (exp) == VOIDmode)
12872 return force_reg (Pmode, exp);
12873 if (GET_MODE (exp) == Pmode)
12874 return copy_to_mode_reg (Pmode, exp);
12875 r = gen_reg_rtx (Pmode);
12876 emit_insn (gen_zero_extendsidi2 (r, exp));
12880 /* Divide COUNTREG by SCALE. */
12882 scale_counter (rtx countreg, int scale)
12885 rtx piece_size_mask;
12889 if (GET_CODE (countreg) == CONST_INT)
12890 return GEN_INT (INTVAL (countreg) / scale);
12891 gcc_assert (REG_P (countreg));
12893 piece_size_mask = GEN_INT (scale - 1);
12894 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
12895 GEN_INT (exact_log2 (scale)),
12896 NULL, 1, OPTAB_DIRECT);
12900 /* When SRCPTR is non-NULL, output simple loop to move memory
12901 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
12902 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
12903 equivalent loop to set memory by VALUE (supposed to be in MODE).
12905 The size is rounded down to whole number of chunk size moved at once.
12906 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
12910 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
12911 rtx destptr, rtx srcptr, rtx value,
12912 rtx count, enum machine_mode mode, int unroll,
12915 rtx out_label, top_label, iter, tmp;
12916 enum machine_mode iter_mode;
12917 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
12918 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
12924 iter_mode = GET_MODE (count);
12925 if (iter_mode == VOIDmode)
12926 iter_mode = word_mode;
12928 top_label = gen_label_rtx ();
12929 out_label = gen_label_rtx ();
12930 iter = gen_reg_rtx (iter_mode);
12932 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
12933 NULL, 1, OPTAB_DIRECT);
12934 /* Those two should combine. */
12935 if (piece_size == const1_rtx)
12937 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
12939 predict_jump (REG_BR_PROB_BASE * 10 / 100);
12941 emit_move_insn (iter, const0_rtx);
12943 emit_label (top_label);
12945 tmp = convert_modes (Pmode, iter_mode, iter, true);
12946 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
12947 destmem = change_address (destmem, mode, x_addr);
12951 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
12952 srcmem = change_address (srcmem, mode, y_addr);
12954 /* When unrolling for chips that reorder memory reads and writes,
12955 we can save registers by using single temporary.
12956 Also using 4 temporaries is overkill in 32bit mode. */
12957 if (!TARGET_64BIT && 0)
12959 for (i = 0; i < unroll; i++)
12964 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12966 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12968 emit_move_insn (destmem, srcmem);
12974 gcc_assert (unroll <= 4);
12975 for (i = 0; i < unroll; i++)
12977 tmpreg[i] = gen_reg_rtx (mode);
12981 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12983 emit_move_insn (tmpreg[i], srcmem);
12985 for (i = 0; i < unroll; i++)
12990 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12992 emit_move_insn (destmem, tmpreg[i]);
12997 for (i = 0; i < unroll; i++)
13001 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13002 emit_move_insn (destmem, value);
13005 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13006 true, OPTAB_LIB_WIDEN);
13008 emit_move_insn (iter, tmp);
13010 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13012 if (expected_size != -1)
13014 expected_size /= GET_MODE_SIZE (mode) * unroll;
13015 if (expected_size == 0)
13017 else if (expected_size > REG_BR_PROB_BASE)
13018 predict_jump (REG_BR_PROB_BASE - 1);
13020 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13023 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13024 iter = ix86_zero_extend_to_Pmode (iter);
13025 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13026 true, OPTAB_LIB_WIDEN);
13027 if (tmp != destptr)
13028 emit_move_insn (destptr, tmp);
13031 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13032 true, OPTAB_LIB_WIDEN);
13034 emit_move_insn (srcptr, tmp);
13036 emit_label (out_label);
13039 /* Output "rep; mov" instruction.
13040 Arguments have same meaning as for previous function */
13042 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13043 rtx destptr, rtx srcptr,
13045 enum machine_mode mode)
13051 /* If the size is known, it is shorter to use rep movs. */
13052 if (mode == QImode && GET_CODE (count) == CONST_INT
13053 && !(INTVAL (count) & 3))
13056 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13057 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13058 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13059 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13060 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13061 if (mode != QImode)
13063 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13064 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13065 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13066 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13067 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13068 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13072 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13073 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13075 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13079 /* Output "rep; stos" instruction.
13080 Arguments have same meaning as for previous function */
13082 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13084 enum machine_mode mode)
13089 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13090 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13091 value = force_reg (mode, gen_lowpart (mode, value));
13092 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13093 if (mode != QImode)
13095 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13096 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13097 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13100 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13101 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13105 emit_strmov (rtx destmem, rtx srcmem,
13106 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13108 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13109 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13110 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13113 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13115 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13116 rtx destptr, rtx srcptr, rtx count, int max_size)
13119 if (GET_CODE (count) == CONST_INT)
13121 HOST_WIDE_INT countval = INTVAL (count);
13124 if ((countval & 0x16) && max_size > 16)
13128 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13129 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13132 gcc_unreachable ();
13135 if ((countval & 0x08) && max_size > 8)
13138 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13141 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13142 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13146 if ((countval & 0x04) && max_size > 4)
13148 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13151 if ((countval & 0x02) && max_size > 2)
13153 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13156 if ((countval & 0x01) && max_size > 1)
13158 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13165 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13166 count, 1, OPTAB_DIRECT);
13167 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13168 count, QImode, 1, 4);
13172 /* When there are stringops, we can cheaply increase dest and src pointers.
13173 Otherwise we save code size by maintaining offset (zero is readily
13174 available from preceeding rep operation) and using x86 addressing modes.
13176 if (TARGET_SINGLE_STRINGOP)
13180 rtx label = ix86_expand_aligntest (count, 4, true);
13181 src = change_address (srcmem, SImode, srcptr);
13182 dest = change_address (destmem, SImode, destptr);
13183 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13184 emit_label (label);
13185 LABEL_NUSES (label) = 1;
13189 rtx label = ix86_expand_aligntest (count, 2, true);
13190 src = change_address (srcmem, HImode, srcptr);
13191 dest = change_address (destmem, HImode, destptr);
13192 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13193 emit_label (label);
13194 LABEL_NUSES (label) = 1;
13198 rtx label = ix86_expand_aligntest (count, 1, true);
13199 src = change_address (srcmem, QImode, srcptr);
13200 dest = change_address (destmem, QImode, destptr);
13201 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13202 emit_label (label);
13203 LABEL_NUSES (label) = 1;
13208 rtx offset = force_reg (Pmode, const0_rtx);
13213 rtx label = ix86_expand_aligntest (count, 4, true);
13214 src = change_address (srcmem, SImode, srcptr);
13215 dest = change_address (destmem, SImode, destptr);
13216 emit_move_insn (dest, src);
13217 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13218 true, OPTAB_LIB_WIDEN);
13220 emit_move_insn (offset, tmp);
13221 emit_label (label);
13222 LABEL_NUSES (label) = 1;
13226 rtx label = ix86_expand_aligntest (count, 2, true);
13227 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13228 src = change_address (srcmem, HImode, tmp);
13229 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13230 dest = change_address (destmem, HImode, tmp);
13231 emit_move_insn (dest, src);
13232 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13233 true, OPTAB_LIB_WIDEN);
13235 emit_move_insn (offset, tmp);
13236 emit_label (label);
13237 LABEL_NUSES (label) = 1;
13241 rtx label = ix86_expand_aligntest (count, 1, true);
13242 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13243 src = change_address (srcmem, QImode, tmp);
13244 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13245 dest = change_address (destmem, QImode, tmp);
13246 emit_move_insn (dest, src);
13247 emit_label (label);
13248 LABEL_NUSES (label) = 1;
13253 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13255 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13256 rtx count, int max_size)
13259 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13260 count, 1, OPTAB_DIRECT);
13261 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13262 gen_lowpart (QImode, value), count, QImode,
13266 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13268 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13271 if (GET_CODE (count) == CONST_INT)
13273 HOST_WIDE_INT countval = INTVAL (count);
13276 if ((countval & 0x16) && max_size > 16)
13280 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13281 emit_insn (gen_strset (destptr, dest, value));
13282 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13283 emit_insn (gen_strset (destptr, dest, value));
13286 gcc_unreachable ();
13289 if ((countval & 0x08) && max_size > 8)
13293 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13294 emit_insn (gen_strset (destptr, dest, value));
13298 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13299 emit_insn (gen_strset (destptr, dest, value));
13300 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13301 emit_insn (gen_strset (destptr, dest, value));
13305 if ((countval & 0x04) && max_size > 4)
13307 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13308 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13311 if ((countval & 0x02) && max_size > 2)
13313 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13314 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13317 if ((countval & 0x01) && max_size > 1)
13319 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13320 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13327 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13332 rtx label = ix86_expand_aligntest (count, 16, true);
13335 dest = change_address (destmem, DImode, destptr);
13336 emit_insn (gen_strset (destptr, dest, value));
13337 emit_insn (gen_strset (destptr, dest, value));
13341 dest = change_address (destmem, SImode, destptr);
13342 emit_insn (gen_strset (destptr, dest, value));
13343 emit_insn (gen_strset (destptr, dest, value));
13344 emit_insn (gen_strset (destptr, dest, value));
13345 emit_insn (gen_strset (destptr, dest, value));
13347 emit_label (label);
13348 LABEL_NUSES (label) = 1;
13352 rtx label = ix86_expand_aligntest (count, 8, true);
13355 dest = change_address (destmem, DImode, destptr);
13356 emit_insn (gen_strset (destptr, dest, value));
13360 dest = change_address (destmem, SImode, destptr);
13361 emit_insn (gen_strset (destptr, dest, value));
13362 emit_insn (gen_strset (destptr, dest, value));
13364 emit_label (label);
13365 LABEL_NUSES (label) = 1;
13369 rtx label = ix86_expand_aligntest (count, 4, true);
13370 dest = change_address (destmem, SImode, destptr);
13371 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13372 emit_label (label);
13373 LABEL_NUSES (label) = 1;
13377 rtx label = ix86_expand_aligntest (count, 2, true);
13378 dest = change_address (destmem, HImode, destptr);
13379 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13380 emit_label (label);
13381 LABEL_NUSES (label) = 1;
13385 rtx label = ix86_expand_aligntest (count, 1, true);
13386 dest = change_address (destmem, QImode, destptr);
13387 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13388 emit_label (label);
13389 LABEL_NUSES (label) = 1;
13393 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13394 DESIRED_ALIGNMENT. */
13396 expand_movmem_prologue (rtx destmem, rtx srcmem,
13397 rtx destptr, rtx srcptr, rtx count,
13398 int align, int desired_alignment)
13400 if (align <= 1 && desired_alignment > 1)
13402 rtx label = ix86_expand_aligntest (destptr, 1, false);
13403 srcmem = change_address (srcmem, QImode, srcptr);
13404 destmem = change_address (destmem, QImode, destptr);
13405 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13406 ix86_adjust_counter (count, 1);
13407 emit_label (label);
13408 LABEL_NUSES (label) = 1;
13410 if (align <= 2 && desired_alignment > 2)
13412 rtx label = ix86_expand_aligntest (destptr, 2, false);
13413 srcmem = change_address (srcmem, HImode, srcptr);
13414 destmem = change_address (destmem, HImode, destptr);
13415 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13416 ix86_adjust_counter (count, 2);
13417 emit_label (label);
13418 LABEL_NUSES (label) = 1;
13420 if (align <= 4 && desired_alignment > 4)
13422 rtx label = ix86_expand_aligntest (destptr, 4, false);
13423 srcmem = change_address (srcmem, SImode, srcptr);
13424 destmem = change_address (destmem, SImode, destptr);
13425 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13426 ix86_adjust_counter (count, 4);
13427 emit_label (label);
13428 LABEL_NUSES (label) = 1;
13430 gcc_assert (desired_alignment <= 8);
13433 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13434 DESIRED_ALIGNMENT. */
13436 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13437 int align, int desired_alignment)
13439 if (align <= 1 && desired_alignment > 1)
13441 rtx label = ix86_expand_aligntest (destptr, 1, false);
13442 destmem = change_address (destmem, QImode, destptr);
13443 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13444 ix86_adjust_counter (count, 1);
13445 emit_label (label);
13446 LABEL_NUSES (label) = 1;
13448 if (align <= 2 && desired_alignment > 2)
13450 rtx label = ix86_expand_aligntest (destptr, 2, false);
13451 destmem = change_address (destmem, HImode, destptr);
13452 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13453 ix86_adjust_counter (count, 2);
13454 emit_label (label);
13455 LABEL_NUSES (label) = 1;
13457 if (align <= 4 && desired_alignment > 4)
13459 rtx label = ix86_expand_aligntest (destptr, 4, false);
13460 destmem = change_address (destmem, SImode, destptr);
13461 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13462 ix86_adjust_counter (count, 4);
13463 emit_label (label);
13464 LABEL_NUSES (label) = 1;
13466 gcc_assert (desired_alignment <= 8);
13469 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13470 static enum stringop_alg
13471 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13472 int *dynamic_check)
13474 const struct stringop_algs * algs;
13476 *dynamic_check = -1;
13478 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13480 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13481 if (stringop_alg != no_stringop)
13482 return stringop_alg;
13483 /* rep; movq or rep; movl is the smallest variant. */
13484 else if (optimize_size)
13486 if (!count || (count & 3))
13487 return rep_prefix_1_byte;
13489 return rep_prefix_4_byte;
13491 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13493 else if (expected_size != -1 && expected_size < 4)
13494 return loop_1_byte;
13495 else if (expected_size != -1)
13498 enum stringop_alg alg = libcall;
13499 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13501 gcc_assert (algs->size[i].max);
13502 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13504 if (algs->size[i].alg != libcall)
13505 alg = algs->size[i].alg;
13506 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13507 last non-libcall inline algorithm. */
13508 if (TARGET_INLINE_ALL_STRINGOPS)
13510 gcc_assert (alg != libcall);
13514 return algs->size[i].alg;
13517 gcc_unreachable ();
13519 /* When asked to inline the call anyway, try to pick meaningful choice.
13520 We look for maximal size of block that is faster to copy by hand and
13521 take blocks of at most of that size guessing that average size will
13522 be roughly half of the block.
13524 If this turns out to be bad, we might simply specify the preferred
13525 choice in ix86_costs. */
13526 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13527 && algs->unknown_size == libcall)
13530 enum stringop_alg alg;
13533 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13534 if (algs->size[i].alg != libcall && algs->size[i].alg)
13535 max = algs->size[i].max;
13538 alg = decide_alg (count, max / 2, memset, dynamic_check);
13539 gcc_assert (*dynamic_check == -1);
13540 gcc_assert (alg != libcall);
13541 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13542 *dynamic_check = max;
13545 return algs->unknown_size;
13548 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13549 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13551 decide_alignment (int align,
13552 enum stringop_alg alg,
13555 int desired_align = 0;
13559 gcc_unreachable ();
13561 case unrolled_loop:
13562 desired_align = GET_MODE_SIZE (Pmode);
13564 case rep_prefix_8_byte:
13567 case rep_prefix_4_byte:
13568 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13569 copying whole cacheline at once. */
13570 if (TARGET_PENTIUMPRO)
13575 case rep_prefix_1_byte:
13576 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13577 copying whole cacheline at once. */
13578 if (TARGET_PENTIUMPRO)
13592 if (desired_align < align)
13593 desired_align = align;
13594 if (expected_size != -1 && expected_size < 4)
13595 desired_align = align;
13596 return desired_align;
13599 /* Expand string move (memcpy) operation. Use i386 string operations when
13600 profitable. expand_clrmem contains similar code. */
13602 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13603 rtx expected_align_exp, rtx expected_size_exp)
13609 rtx jump_around_label = NULL;
13610 HOST_WIDE_INT align = 1;
13611 unsigned HOST_WIDE_INT count = 0;
13612 HOST_WIDE_INT expected_size = -1;
13613 int size_needed = 0;
13614 int desired_align = 0;
13615 enum stringop_alg alg;
13617 /* Precise placement on cld depends whether stringops will be emit in
13618 prologue, main copying body or epilogue. This variable keeps track
13619 if cld was already needed. */
13620 bool cld_done = false;
13622 if (GET_CODE (align_exp) == CONST_INT)
13623 align = INTVAL (align_exp);
13624 /* i386 can do missaligned access on resonably increased cost. */
13625 if (GET_CODE (expected_align_exp) == CONST_INT
13626 && INTVAL (expected_align_exp) > align)
13627 align = INTVAL (expected_align_exp);
13628 if (GET_CODE (count_exp) == CONST_INT)
13629 count = expected_size = INTVAL (count_exp);
13630 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13632 expected_size = INTVAL (expected_size_exp);
13635 alg = decide_alg (count, expected_size, false, &dynamic_check);
13636 desired_align = decide_alignment (align, alg, expected_size);
13638 if (!TARGET_ALIGN_STRINGOPS)
13639 align = desired_align;
13641 if (alg == libcall)
13643 gcc_assert (alg != no_stringop);
13645 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13646 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13647 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
13652 gcc_unreachable ();
13654 size_needed = GET_MODE_SIZE (Pmode);
13656 case unrolled_loop:
13657 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
13659 case rep_prefix_8_byte:
13662 case rep_prefix_4_byte:
13665 case rep_prefix_1_byte:
13671 /* Alignment code needs count to be in register. */
13672 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13674 enum machine_mode mode = SImode;
13675 if (TARGET_64BIT && (count & ~0xffffffff))
13677 count_exp = force_reg (mode, count_exp);
13679 gcc_assert (desired_align >= 1 && align >= 1);
13680 /* Ensure that alignment prologue won't copy past end of block. */
13681 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13684 int size = MAX (size_needed - 1, desired_align - align);
13685 if (TARGET_SINGLE_STRINGOP)
13686 emit_insn (gen_cld ()), cld_done = true;
13687 label = gen_label_rtx ();
13688 emit_cmp_and_jump_insns (count_exp,
13690 LEU, 0, GET_MODE (count_exp), 1, label);
13691 if (expected_size == -1 || expected_size < size)
13692 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13694 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13696 /* Emit code to decide on runtime whether library call or inline should be
13698 if (dynamic_check != -1)
13700 rtx hot_label = gen_label_rtx ();
13701 jump_around_label = gen_label_rtx ();
13702 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13703 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13704 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13705 emit_block_move_via_libcall (dst, src, count_exp, false);
13706 emit_jump (jump_around_label);
13707 emit_label (hot_label);
13711 /* Alignment prologue. */
13712 if (desired_align > align)
13714 /* Except for the first move in epilogue, we no longer know
13715 constant offset in aliasing info. It don't seems to worth
13716 the pain to maintain it for the first move, so throw away
13718 src = change_address (src, BLKmode, srcreg);
13719 dst = change_address (dst, BLKmode, destreg);
13720 if (TARGET_SINGLE_STRINGOP && !cld_done)
13721 emit_insn (gen_cld ()), cld_done = true;
13722 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
13725 if (label && size_needed == 1)
13727 emit_label (label);
13728 LABEL_NUSES (label) = 1;
13737 gcc_unreachable ();
13739 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13740 count_exp, QImode, 1, expected_size);
13743 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13744 count_exp, Pmode, 1, expected_size);
13746 case unrolled_loop:
13747 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13748 registers for 4 temporaries anyway. */
13749 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13750 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
13753 case rep_prefix_8_byte:
13755 emit_insn (gen_cld ()), cld_done = true;
13756 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13759 case rep_prefix_4_byte:
13761 emit_insn (gen_cld ()), cld_done = true;
13762 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13765 case rep_prefix_1_byte:
13767 emit_insn (gen_cld ()), cld_done = true;
13768 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13772 /* Adjust properly the offset of src and dest memory for aliasing. */
13773 if (GET_CODE (count_exp) == CONST_INT)
13775 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
13776 (count / size_needed) * size_needed);
13777 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
13778 (count / size_needed) * size_needed);
13782 src = change_address (src, BLKmode, srcreg);
13783 dst = change_address (dst, BLKmode, destreg);
13786 /* Epologue to copy the remaining bytes. */
13789 if (size_needed < desired_align - align)
13792 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
13793 GEN_INT (size_needed - 1), count_exp, 1,
13795 size_needed = desired_align - align + 1;
13796 if (tmp != count_exp)
13797 emit_move_insn (count_exp, tmp);
13799 emit_label (label);
13800 LABEL_NUSES (label) = 1;
13802 if (count_exp != const0_rtx && size_needed > 1)
13804 if (TARGET_SINGLE_STRINGOP && !cld_done)
13805 emit_insn (gen_cld ()), cld_done = true;
13806 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
13809 if (jump_around_label)
13810 emit_label (jump_around_label);
13814 /* Helper function for memcpy. For QImode value 0xXY produce
13815 0xXYXYXYXY of wide specified by MODE. This is essentially
13816 a * 0x10101010, but we can do slightly better than
13817 synth_mult by unwinding the sequence by hand on CPUs with
13820 promote_duplicated_reg (enum machine_mode mode, rtx val)
13822 enum machine_mode valmode = GET_MODE (val);
13824 int nops = mode == DImode ? 3 : 2;
13826 gcc_assert (mode == SImode || mode == DImode);
13827 if (val == const0_rtx)
13828 return copy_to_mode_reg (mode, const0_rtx);
13829 if (GET_CODE (val) == CONST_INT)
13831 HOST_WIDE_INT v = INTVAL (val) & 255;
13835 if (mode == DImode)
13836 v |= (v << 16) << 16;
13837 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
13840 if (valmode == VOIDmode)
13842 if (valmode != QImode)
13843 val = gen_lowpart (QImode, val);
13844 if (mode == QImode)
13846 if (!TARGET_PARTIAL_REG_STALL)
13848 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
13849 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
13850 <= (ix86_cost->shift_const + ix86_cost->add) * nops
13851 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
13853 rtx reg = convert_modes (mode, QImode, val, true);
13854 tmp = promote_duplicated_reg (mode, const1_rtx);
13855 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
13860 rtx reg = convert_modes (mode, QImode, val, true);
13862 if (!TARGET_PARTIAL_REG_STALL)
13863 if (mode == SImode)
13864 emit_insn (gen_movsi_insv_1 (reg, reg));
13866 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
13869 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
13870 NULL, 1, OPTAB_DIRECT);
13872 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13874 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
13875 NULL, 1, OPTAB_DIRECT);
13876 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13877 if (mode == SImode)
13879 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
13880 NULL, 1, OPTAB_DIRECT);
13881 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13886 /* Expand string clear operation (bzero). Use i386 string operations when
13887 profitable. expand_movmem contains similar code. */
13889 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
13890 rtx expected_align_exp, rtx expected_size_exp)
13895 rtx jump_around_label = NULL;
13896 HOST_WIDE_INT align = 1;
13897 unsigned HOST_WIDE_INT count = 0;
13898 HOST_WIDE_INT expected_size = -1;
13899 int size_needed = 0;
13900 int desired_align = 0;
13901 enum stringop_alg alg;
13902 /* Precise placement on cld depends whether stringops will be emit in
13903 prologue, main copying body or epilogue. This variable keeps track
13904 if cld was already needed. */
13905 bool cld_done = false;
13906 rtx promoted_val = val_exp;
13907 bool force_loopy_epilogue = false;
13910 if (GET_CODE (align_exp) == CONST_INT)
13911 align = INTVAL (align_exp);
13912 /* i386 can do missaligned access on resonably increased cost. */
13913 if (GET_CODE (expected_align_exp) == CONST_INT
13914 && INTVAL (expected_align_exp) > align)
13915 align = INTVAL (expected_align_exp);
13916 if (GET_CODE (count_exp) == CONST_INT)
13917 count = expected_size = INTVAL (count_exp);
13918 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13919 expected_size = INTVAL (expected_size_exp);
13921 alg = decide_alg (count, expected_size, true, &dynamic_check);
13922 desired_align = decide_alignment (align, alg, expected_size);
13924 if (!TARGET_ALIGN_STRINGOPS)
13925 align = desired_align;
13927 if (alg == libcall)
13929 gcc_assert (alg != no_stringop);
13931 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13932 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13937 gcc_unreachable ();
13939 size_needed = GET_MODE_SIZE (Pmode);
13941 case unrolled_loop:
13942 size_needed = GET_MODE_SIZE (Pmode) * 4;
13944 case rep_prefix_8_byte:
13947 case rep_prefix_4_byte:
13950 case rep_prefix_1_byte:
13955 /* Alignment code needs count to be in register. */
13956 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13958 enum machine_mode mode = SImode;
13959 if (TARGET_64BIT && (count & ~0xffffffff))
13961 count_exp = force_reg (mode, count_exp);
13963 /* Ensure that alignment prologue won't copy past end of block. */
13964 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13967 int size = MAX (size_needed - 1, desired_align - align);
13968 /* To improve performance of small blocks, we jump around the promoting
13969 code, so we need to use QImode accesses in epilogue. */
13970 if (GET_CODE (val_exp) != CONST_INT && size_needed > 1)
13971 force_loopy_epilogue = true;
13972 else if (TARGET_SINGLE_STRINGOP)
13973 emit_insn (gen_cld ()), cld_done = true;
13974 label = gen_label_rtx ();
13975 emit_cmp_and_jump_insns (count_exp,
13977 LEU, 0, GET_MODE (count_exp), 1, label);
13978 if (expected_size == -1 || expected_size <= size)
13979 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13981 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13983 if (dynamic_check != -1)
13985 rtx hot_label = gen_label_rtx ();
13986 jump_around_label = gen_label_rtx ();
13987 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13988 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13989 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13990 set_storage_via_libcall (dst, count_exp, val_exp, false);
13991 emit_jump (jump_around_label);
13992 emit_label (hot_label);
13995 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
13996 promoted_val = promote_duplicated_reg (DImode, val_exp);
13997 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
13998 promoted_val = promote_duplicated_reg (SImode, val_exp);
13999 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14000 promoted_val = promote_duplicated_reg (HImode, val_exp);
14002 promoted_val = val_exp;
14003 gcc_assert (desired_align >= 1 && align >= 1);
14004 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14005 && !count && !label)
14007 int size = MAX (size_needed - 1, desired_align - align);
14008 if (TARGET_SINGLE_STRINGOP)
14009 emit_insn (gen_cld ()), cld_done = true;
14010 label = gen_label_rtx ();
14011 emit_cmp_and_jump_insns (count_exp,
14013 LEU, 0, GET_MODE (count_exp), 1, label);
14014 if (expected_size == -1 || expected_size <= size)
14015 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14017 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14019 if (desired_align > align)
14021 /* Except for the first move in epilogue, we no longer know
14022 constant offset in aliasing info. It don't seems to worth
14023 the pain to maintain it for the first move, so throw away
14025 dst = change_address (dst, BLKmode, destreg);
14026 if (TARGET_SINGLE_STRINGOP && !cld_done)
14027 emit_insn (gen_cld ()), cld_done = true;
14028 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14031 if (label && size_needed == 1)
14033 emit_label (label);
14034 LABEL_NUSES (label) = 1;
14041 gcc_unreachable ();
14043 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14044 count_exp, QImode, 1, expected_size);
14047 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14048 count_exp, Pmode, 1, expected_size);
14050 case unrolled_loop:
14051 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14052 count_exp, Pmode, 4, expected_size);
14054 case rep_prefix_8_byte:
14056 emit_insn (gen_cld ()), cld_done = true;
14057 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14060 case rep_prefix_4_byte:
14062 emit_insn (gen_cld ()), cld_done = true;
14063 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14066 case rep_prefix_1_byte:
14068 emit_insn (gen_cld ()), cld_done = true;
14069 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14073 /* Adjust properly the offset of src and dest memory for aliasing. */
14074 if (GET_CODE (count_exp) == CONST_INT)
14075 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14076 (count / size_needed) * size_needed);
14078 dst = change_address (dst, BLKmode, destreg);
14082 if (size_needed < desired_align - align)
14085 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14086 GEN_INT (size_needed - 1), count_exp, 1,
14088 size_needed = desired_align - align + 1;
14089 if (tmp != count_exp)
14090 emit_move_insn (count_exp, tmp);
14092 emit_label (label);
14093 LABEL_NUSES (label) = 1;
14095 if (count_exp != const0_rtx && size_needed > 1)
14097 if (force_loopy_epilogue)
14098 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14102 if (TARGET_SINGLE_STRINGOP && !cld_done)
14103 emit_insn (gen_cld ()), cld_done = true;
14104 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14108 if (jump_around_label)
14109 emit_label (jump_around_label);
14113 /* Expand strlen. */
14115 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14117 rtx addr, scratch1, scratch2, scratch3, scratch4;
14119 /* The generic case of strlen expander is long. Avoid it's
14120 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14122 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14123 && !TARGET_INLINE_ALL_STRINGOPS
14125 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
14128 addr = force_reg (Pmode, XEXP (src, 0));
14129 scratch1 = gen_reg_rtx (Pmode);
14131 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14134 /* Well it seems that some optimizer does not combine a call like
14135 foo(strlen(bar), strlen(bar));
14136 when the move and the subtraction is done here. It does calculate
14137 the length just once when these instructions are done inside of
14138 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14139 often used and I use one fewer register for the lifetime of
14140 output_strlen_unroll() this is better. */
14142 emit_move_insn (out, addr);
14144 ix86_expand_strlensi_unroll_1 (out, src, align);
14146 /* strlensi_unroll_1 returns the address of the zero at the end of
14147 the string, like memchr(), so compute the length by subtracting
14148 the start address. */
14150 emit_insn (gen_subdi3 (out, out, addr));
14152 emit_insn (gen_subsi3 (out, out, addr));
14157 scratch2 = gen_reg_rtx (Pmode);
14158 scratch3 = gen_reg_rtx (Pmode);
14159 scratch4 = force_reg (Pmode, constm1_rtx);
14161 emit_move_insn (scratch3, addr);
14162 eoschar = force_reg (QImode, eoschar);
14164 emit_insn (gen_cld ());
14165 src = replace_equiv_address_nv (src, scratch3);
14167 /* If .md starts supporting :P, this can be done in .md. */
14168 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14169 scratch4), UNSPEC_SCAS);
14170 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14173 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14174 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14178 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14179 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14185 /* Expand the appropriate insns for doing strlen if not just doing
14188 out = result, initialized with the start address
14189 align_rtx = alignment of the address.
14190 scratch = scratch register, initialized with the startaddress when
14191 not aligned, otherwise undefined
14193 This is just the body. It needs the initializations mentioned above and
14194 some address computing at the end. These things are done in i386.md. */
14197 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14201 rtx align_2_label = NULL_RTX;
14202 rtx align_3_label = NULL_RTX;
14203 rtx align_4_label = gen_label_rtx ();
14204 rtx end_0_label = gen_label_rtx ();
14206 rtx tmpreg = gen_reg_rtx (SImode);
14207 rtx scratch = gen_reg_rtx (SImode);
14211 if (GET_CODE (align_rtx) == CONST_INT)
14212 align = INTVAL (align_rtx);
14214 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14216 /* Is there a known alignment and is it less than 4? */
14219 rtx scratch1 = gen_reg_rtx (Pmode);
14220 emit_move_insn (scratch1, out);
14221 /* Is there a known alignment and is it not 2? */
14224 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14225 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14227 /* Leave just the 3 lower bits. */
14228 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14229 NULL_RTX, 0, OPTAB_WIDEN);
14231 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14232 Pmode, 1, align_4_label);
14233 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14234 Pmode, 1, align_2_label);
14235 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14236 Pmode, 1, align_3_label);
14240 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14241 check if is aligned to 4 - byte. */
14243 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14244 NULL_RTX, 0, OPTAB_WIDEN);
14246 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14247 Pmode, 1, align_4_label);
14250 mem = change_address (src, QImode, out);
14252 /* Now compare the bytes. */
14254 /* Compare the first n unaligned byte on a byte per byte basis. */
14255 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14256 QImode, 1, end_0_label);
14258 /* Increment the address. */
14260 emit_insn (gen_adddi3 (out, out, const1_rtx));
14262 emit_insn (gen_addsi3 (out, out, const1_rtx));
14264 /* Not needed with an alignment of 2 */
14267 emit_label (align_2_label);
14269 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14273 emit_insn (gen_adddi3 (out, out, const1_rtx));
14275 emit_insn (gen_addsi3 (out, out, const1_rtx));
14277 emit_label (align_3_label);
14280 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14284 emit_insn (gen_adddi3 (out, out, const1_rtx));
14286 emit_insn (gen_addsi3 (out, out, const1_rtx));
14289 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14290 align this loop. It gives only huge programs, but does not help to
14292 emit_label (align_4_label);
14294 mem = change_address (src, SImode, out);
14295 emit_move_insn (scratch, mem);
14297 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14299 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14301 /* This formula yields a nonzero result iff one of the bytes is zero.
14302 This saves three branches inside loop and many cycles. */
14304 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14305 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14306 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14307 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14308 gen_int_mode (0x80808080, SImode)));
14309 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14314 rtx reg = gen_reg_rtx (SImode);
14315 rtx reg2 = gen_reg_rtx (Pmode);
14316 emit_move_insn (reg, tmpreg);
14317 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14319 /* If zero is not in the first two bytes, move two bytes forward. */
14320 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14321 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14322 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14323 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14324 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14327 /* Emit lea manually to avoid clobbering of flags. */
14328 emit_insn (gen_rtx_SET (SImode, reg2,
14329 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14331 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14332 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14333 emit_insn (gen_rtx_SET (VOIDmode, out,
14334 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14341 rtx end_2_label = gen_label_rtx ();
14342 /* Is zero in the first two bytes? */
14344 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14345 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14346 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14347 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14348 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14350 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14351 JUMP_LABEL (tmp) = end_2_label;
14353 /* Not in the first two. Move two bytes forward. */
14354 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14356 emit_insn (gen_adddi3 (out, out, const2_rtx));
14358 emit_insn (gen_addsi3 (out, out, const2_rtx));
14360 emit_label (end_2_label);
14364 /* Avoid branch in fixing the byte. */
14365 tmpreg = gen_lowpart (QImode, tmpreg);
14366 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14367 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14369 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14371 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14373 emit_label (end_0_label);
14377 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14378 rtx callarg2 ATTRIBUTE_UNUSED,
14379 rtx pop, int sibcall)
14381 rtx use = NULL, call;
14383 if (pop == const0_rtx)
14385 gcc_assert (!TARGET_64BIT || !pop);
14387 if (TARGET_MACHO && !TARGET_64BIT)
14390 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14391 fnaddr = machopic_indirect_call_target (fnaddr);
14396 /* Static functions and indirect calls don't need the pic register. */
14397 if (! TARGET_64BIT && flag_pic
14398 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14399 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14400 use_reg (&use, pic_offset_table_rtx);
14403 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14405 rtx al = gen_rtx_REG (QImode, 0);
14406 emit_move_insn (al, callarg2);
14407 use_reg (&use, al);
14410 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14412 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14413 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14415 if (sibcall && TARGET_64BIT
14416 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14419 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14420 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
14421 emit_move_insn (fnaddr, addr);
14422 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14425 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14427 call = gen_rtx_SET (VOIDmode, retval, call);
14430 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14431 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14432 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14435 call = emit_call_insn (call);
14437 CALL_INSN_FUNCTION_USAGE (call) = use;
14441 /* Clear stack slot assignments remembered from previous functions.
14442 This is called from INIT_EXPANDERS once before RTL is emitted for each
14445 static struct machine_function *
14446 ix86_init_machine_status (void)
14448 struct machine_function *f;
14450 f = ggc_alloc_cleared (sizeof (struct machine_function));
14451 f->use_fast_prologue_epilogue_nregs = -1;
14452 f->tls_descriptor_call_expanded_p = 0;
14457 /* Return a MEM corresponding to a stack slot with mode MODE.
14458 Allocate a new slot if necessary.
14460 The RTL for a function can have several slots available: N is
14461 which slot to use. */
14464 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14466 struct stack_local_entry *s;
14468 gcc_assert (n < MAX_386_STACK_LOCALS);
14470 for (s = ix86_stack_locals; s; s = s->next)
14471 if (s->mode == mode && s->n == n)
14472 return copy_rtx (s->rtl);
14474 s = (struct stack_local_entry *)
14475 ggc_alloc (sizeof (struct stack_local_entry));
14478 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14480 s->next = ix86_stack_locals;
14481 ix86_stack_locals = s;
14485 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14487 static GTY(()) rtx ix86_tls_symbol;
14489 ix86_tls_get_addr (void)
14492 if (!ix86_tls_symbol)
14494 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14495 (TARGET_ANY_GNU_TLS
14497 ? "___tls_get_addr"
14498 : "__tls_get_addr");
14501 return ix86_tls_symbol;
14504 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14506 static GTY(()) rtx ix86_tls_module_base_symbol;
14508 ix86_tls_module_base (void)
14511 if (!ix86_tls_module_base_symbol)
14513 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14514 "_TLS_MODULE_BASE_");
14515 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14516 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14519 return ix86_tls_module_base_symbol;
14522 /* Calculate the length of the memory address in the instruction
14523 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14526 memory_address_length (rtx addr)
14528 struct ix86_address parts;
14529 rtx base, index, disp;
14533 if (GET_CODE (addr) == PRE_DEC
14534 || GET_CODE (addr) == POST_INC
14535 || GET_CODE (addr) == PRE_MODIFY
14536 || GET_CODE (addr) == POST_MODIFY)
14539 ok = ix86_decompose_address (addr, &parts);
14542 if (parts.base && GET_CODE (parts.base) == SUBREG)
14543 parts.base = SUBREG_REG (parts.base);
14544 if (parts.index && GET_CODE (parts.index) == SUBREG)
14545 parts.index = SUBREG_REG (parts.index);
14548 index = parts.index;
14553 - esp as the base always wants an index,
14554 - ebp as the base always wants a displacement. */
14556 /* Register Indirect. */
14557 if (base && !index && !disp)
14559 /* esp (for its index) and ebp (for its displacement) need
14560 the two-byte modrm form. */
14561 if (addr == stack_pointer_rtx
14562 || addr == arg_pointer_rtx
14563 || addr == frame_pointer_rtx
14564 || addr == hard_frame_pointer_rtx)
14568 /* Direct Addressing. */
14569 else if (disp && !base && !index)
14574 /* Find the length of the displacement constant. */
14577 if (base && satisfies_constraint_K (disp))
14582 /* ebp always wants a displacement. */
14583 else if (base == hard_frame_pointer_rtx)
14586 /* An index requires the two-byte modrm form.... */
14588 /* ...like esp, which always wants an index. */
14589 || base == stack_pointer_rtx
14590 || base == arg_pointer_rtx
14591 || base == frame_pointer_rtx)
14598 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14599 is set, expect that insn have 8bit immediate alternative. */
14601 ix86_attr_length_immediate_default (rtx insn, int shortform)
14605 extract_insn_cached (insn);
14606 for (i = recog_data.n_operands - 1; i >= 0; --i)
14607 if (CONSTANT_P (recog_data.operand[i]))
14610 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
14614 switch (get_attr_mode (insn))
14625 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14630 fatal_insn ("unknown insn mode", insn);
14636 /* Compute default value for "length_address" attribute. */
14638 ix86_attr_length_address_default (rtx insn)
14642 if (get_attr_type (insn) == TYPE_LEA)
14644 rtx set = PATTERN (insn);
14646 if (GET_CODE (set) == PARALLEL)
14647 set = XVECEXP (set, 0, 0);
14649 gcc_assert (GET_CODE (set) == SET);
14651 return memory_address_length (SET_SRC (set));
14654 extract_insn_cached (insn);
14655 for (i = recog_data.n_operands - 1; i >= 0; --i)
14656 if (GET_CODE (recog_data.operand[i]) == MEM)
14658 return memory_address_length (XEXP (recog_data.operand[i], 0));
14664 /* Return the maximum number of instructions a cpu can issue. */
14667 ix86_issue_rate (void)
14671 case PROCESSOR_PENTIUM:
14675 case PROCESSOR_PENTIUMPRO:
14676 case PROCESSOR_PENTIUM4:
14677 case PROCESSOR_ATHLON:
14679 case PROCESSOR_NOCONA:
14680 case PROCESSOR_GENERIC32:
14681 case PROCESSOR_GENERIC64:
14684 case PROCESSOR_CORE2:
14692 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14693 by DEP_INSN and nothing set by DEP_INSN. */
14696 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14700 /* Simplify the test for uninteresting insns. */
14701 if (insn_type != TYPE_SETCC
14702 && insn_type != TYPE_ICMOV
14703 && insn_type != TYPE_FCMOV
14704 && insn_type != TYPE_IBR)
14707 if ((set = single_set (dep_insn)) != 0)
14709 set = SET_DEST (set);
14712 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14713 && XVECLEN (PATTERN (dep_insn), 0) == 2
14714 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14715 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14717 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14718 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14723 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14726 /* This test is true if the dependent insn reads the flags but
14727 not any other potentially set register. */
14728 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14731 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14737 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14738 address with operands set by DEP_INSN. */
14741 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14745 if (insn_type == TYPE_LEA
14748 addr = PATTERN (insn);
14750 if (GET_CODE (addr) == PARALLEL)
14751 addr = XVECEXP (addr, 0, 0);
14753 gcc_assert (GET_CODE (addr) == SET);
14755 addr = SET_SRC (addr);
14760 extract_insn_cached (insn);
14761 for (i = recog_data.n_operands - 1; i >= 0; --i)
14762 if (GET_CODE (recog_data.operand[i]) == MEM)
14764 addr = XEXP (recog_data.operand[i], 0);
14771 return modified_in_p (addr, dep_insn);
14775 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14777 enum attr_type insn_type, dep_insn_type;
14778 enum attr_memory memory;
14780 int dep_insn_code_number;
14782 /* Anti and output dependencies have zero cost on all CPUs. */
14783 if (REG_NOTE_KIND (link) != 0)
14786 dep_insn_code_number = recog_memoized (dep_insn);
14788 /* If we can't recognize the insns, we can't really do anything. */
14789 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14792 insn_type = get_attr_type (insn);
14793 dep_insn_type = get_attr_type (dep_insn);
14797 case PROCESSOR_PENTIUM:
14798 /* Address Generation Interlock adds a cycle of latency. */
14799 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14802 /* ??? Compares pair with jump/setcc. */
14803 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14806 /* Floating point stores require value to be ready one cycle earlier. */
14807 if (insn_type == TYPE_FMOV
14808 && get_attr_memory (insn) == MEMORY_STORE
14809 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14813 case PROCESSOR_PENTIUMPRO:
14814 memory = get_attr_memory (insn);
14816 /* INT->FP conversion is expensive. */
14817 if (get_attr_fp_int_src (dep_insn))
14820 /* There is one cycle extra latency between an FP op and a store. */
14821 if (insn_type == TYPE_FMOV
14822 && (set = single_set (dep_insn)) != NULL_RTX
14823 && (set2 = single_set (insn)) != NULL_RTX
14824 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14825 && GET_CODE (SET_DEST (set2)) == MEM)
14828 /* Show ability of reorder buffer to hide latency of load by executing
14829 in parallel with previous instruction in case
14830 previous instruction is not needed to compute the address. */
14831 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14832 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14834 /* Claim moves to take one cycle, as core can issue one load
14835 at time and the next load can start cycle later. */
14836 if (dep_insn_type == TYPE_IMOV
14837 || dep_insn_type == TYPE_FMOV)
14845 memory = get_attr_memory (insn);
14847 /* The esp dependency is resolved before the instruction is really
14849 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14850 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14853 /* INT->FP conversion is expensive. */
14854 if (get_attr_fp_int_src (dep_insn))
14857 /* Show ability of reorder buffer to hide latency of load by executing
14858 in parallel with previous instruction in case
14859 previous instruction is not needed to compute the address. */
14860 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14861 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14863 /* Claim moves to take one cycle, as core can issue one load
14864 at time and the next load can start cycle later. */
14865 if (dep_insn_type == TYPE_IMOV
14866 || dep_insn_type == TYPE_FMOV)
14875 case PROCESSOR_ATHLON:
14877 case PROCESSOR_GENERIC32:
14878 case PROCESSOR_GENERIC64:
14879 memory = get_attr_memory (insn);
14881 /* Show ability of reorder buffer to hide latency of load by executing
14882 in parallel with previous instruction in case
14883 previous instruction is not needed to compute the address. */
14884 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14885 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14887 enum attr_unit unit = get_attr_unit (insn);
14890 /* Because of the difference between the length of integer and
14891 floating unit pipeline preparation stages, the memory operands
14892 for floating point are cheaper.
14894 ??? For Athlon it the difference is most probably 2. */
14895 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14898 loadcost = TARGET_ATHLON ? 2 : 0;
14900 if (cost >= loadcost)
14913 /* How many alternative schedules to try. This should be as wide as the
14914 scheduling freedom in the DFA, but no wider. Making this value too
14915 large results extra work for the scheduler. */
14918 ia32_multipass_dfa_lookahead (void)
14920 if (ix86_tune == PROCESSOR_PENTIUM)
14923 if (ix86_tune == PROCESSOR_PENTIUMPRO
14924 || ix86_tune == PROCESSOR_K6)
14932 /* Compute the alignment given to a constant that is being placed in memory.
14933 EXP is the constant and ALIGN is the alignment that the object would
14935 The value of this function is used instead of that alignment to align
14939 ix86_constant_alignment (tree exp, int align)
14941 if (TREE_CODE (exp) == REAL_CST)
14943 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14945 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14948 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14949 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14950 return BITS_PER_WORD;
14955 /* Compute the alignment for a static variable.
14956 TYPE is the data type, and ALIGN is the alignment that
14957 the object would ordinarily have. The value of this function is used
14958 instead of that alignment to align the object. */
14961 ix86_data_alignment (tree type, int align)
14963 int max_align = optimize_size ? BITS_PER_WORD : 256;
14965 if (AGGREGATE_TYPE_P (type)
14966 && TYPE_SIZE (type)
14967 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14968 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14969 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14970 && align < max_align)
14973 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14974 to 16byte boundary. */
14977 if (AGGREGATE_TYPE_P (type)
14978 && TYPE_SIZE (type)
14979 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14980 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14981 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14985 if (TREE_CODE (type) == ARRAY_TYPE)
14987 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14989 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14992 else if (TREE_CODE (type) == COMPLEX_TYPE)
14995 if (TYPE_MODE (type) == DCmode && align < 64)
14997 if (TYPE_MODE (type) == XCmode && align < 128)
15000 else if ((TREE_CODE (type) == RECORD_TYPE
15001 || TREE_CODE (type) == UNION_TYPE
15002 || TREE_CODE (type) == QUAL_UNION_TYPE)
15003 && TYPE_FIELDS (type))
15005 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15007 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15010 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15011 || TREE_CODE (type) == INTEGER_TYPE)
15013 if (TYPE_MODE (type) == DFmode && align < 64)
15015 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15022 /* Compute the alignment for a local variable.
15023 TYPE is the data type, and ALIGN is the alignment that
15024 the object would ordinarily have. The value of this macro is used
15025 instead of that alignment to align the object. */
15028 ix86_local_alignment (tree type, int align)
15030 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15031 to 16byte boundary. */
15034 if (AGGREGATE_TYPE_P (type)
15035 && TYPE_SIZE (type)
15036 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15037 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15038 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15041 if (TREE_CODE (type) == ARRAY_TYPE)
15043 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15045 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15048 else if (TREE_CODE (type) == COMPLEX_TYPE)
15050 if (TYPE_MODE (type) == DCmode && align < 64)
15052 if (TYPE_MODE (type) == XCmode && align < 128)
15055 else if ((TREE_CODE (type) == RECORD_TYPE
15056 || TREE_CODE (type) == UNION_TYPE
15057 || TREE_CODE (type) == QUAL_UNION_TYPE)
15058 && TYPE_FIELDS (type))
15060 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15062 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15065 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15066 || TREE_CODE (type) == INTEGER_TYPE)
15069 if (TYPE_MODE (type) == DFmode && align < 64)
15071 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15077 /* Emit RTL insns to initialize the variable parts of a trampoline.
15078 FNADDR is an RTX for the address of the function's pure code.
15079 CXT is an RTX for the static chain value for the function. */
15081 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15085 /* Compute offset from the end of the jmp to the target function. */
15086 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15087 plus_constant (tramp, 10),
15088 NULL_RTX, 1, OPTAB_DIRECT);
15089 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15090 gen_int_mode (0xb9, QImode));
15091 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15092 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15093 gen_int_mode (0xe9, QImode));
15094 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15099 /* Try to load address using shorter movl instead of movabs.
15100 We may want to support movq for kernel mode, but kernel does not use
15101 trampolines at the moment. */
15102 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15104 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15105 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15106 gen_int_mode (0xbb41, HImode));
15107 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15108 gen_lowpart (SImode, fnaddr));
15113 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15114 gen_int_mode (0xbb49, HImode));
15115 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15119 /* Load static chain using movabs to r10. */
15120 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15121 gen_int_mode (0xba49, HImode));
15122 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15125 /* Jump to the r11 */
15126 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15127 gen_int_mode (0xff49, HImode));
15128 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15129 gen_int_mode (0xe3, QImode));
15131 gcc_assert (offset <= TRAMPOLINE_SIZE);
15134 #ifdef ENABLE_EXECUTE_STACK
15135 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15136 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15140 /* Codes for all the SSE/MMX builtins. */
15143 IX86_BUILTIN_ADDPS,
15144 IX86_BUILTIN_ADDSS,
15145 IX86_BUILTIN_DIVPS,
15146 IX86_BUILTIN_DIVSS,
15147 IX86_BUILTIN_MULPS,
15148 IX86_BUILTIN_MULSS,
15149 IX86_BUILTIN_SUBPS,
15150 IX86_BUILTIN_SUBSS,
15152 IX86_BUILTIN_CMPEQPS,
15153 IX86_BUILTIN_CMPLTPS,
15154 IX86_BUILTIN_CMPLEPS,
15155 IX86_BUILTIN_CMPGTPS,
15156 IX86_BUILTIN_CMPGEPS,
15157 IX86_BUILTIN_CMPNEQPS,
15158 IX86_BUILTIN_CMPNLTPS,
15159 IX86_BUILTIN_CMPNLEPS,
15160 IX86_BUILTIN_CMPNGTPS,
15161 IX86_BUILTIN_CMPNGEPS,
15162 IX86_BUILTIN_CMPORDPS,
15163 IX86_BUILTIN_CMPUNORDPS,
15164 IX86_BUILTIN_CMPEQSS,
15165 IX86_BUILTIN_CMPLTSS,
15166 IX86_BUILTIN_CMPLESS,
15167 IX86_BUILTIN_CMPNEQSS,
15168 IX86_BUILTIN_CMPNLTSS,
15169 IX86_BUILTIN_CMPNLESS,
15170 IX86_BUILTIN_CMPNGTSS,
15171 IX86_BUILTIN_CMPNGESS,
15172 IX86_BUILTIN_CMPORDSS,
15173 IX86_BUILTIN_CMPUNORDSS,
15175 IX86_BUILTIN_COMIEQSS,
15176 IX86_BUILTIN_COMILTSS,
15177 IX86_BUILTIN_COMILESS,
15178 IX86_BUILTIN_COMIGTSS,
15179 IX86_BUILTIN_COMIGESS,
15180 IX86_BUILTIN_COMINEQSS,
15181 IX86_BUILTIN_UCOMIEQSS,
15182 IX86_BUILTIN_UCOMILTSS,
15183 IX86_BUILTIN_UCOMILESS,
15184 IX86_BUILTIN_UCOMIGTSS,
15185 IX86_BUILTIN_UCOMIGESS,
15186 IX86_BUILTIN_UCOMINEQSS,
15188 IX86_BUILTIN_CVTPI2PS,
15189 IX86_BUILTIN_CVTPS2PI,
15190 IX86_BUILTIN_CVTSI2SS,
15191 IX86_BUILTIN_CVTSI642SS,
15192 IX86_BUILTIN_CVTSS2SI,
15193 IX86_BUILTIN_CVTSS2SI64,
15194 IX86_BUILTIN_CVTTPS2PI,
15195 IX86_BUILTIN_CVTTSS2SI,
15196 IX86_BUILTIN_CVTTSS2SI64,
15198 IX86_BUILTIN_MAXPS,
15199 IX86_BUILTIN_MAXSS,
15200 IX86_BUILTIN_MINPS,
15201 IX86_BUILTIN_MINSS,
15203 IX86_BUILTIN_LOADUPS,
15204 IX86_BUILTIN_STOREUPS,
15205 IX86_BUILTIN_MOVSS,
15207 IX86_BUILTIN_MOVHLPS,
15208 IX86_BUILTIN_MOVLHPS,
15209 IX86_BUILTIN_LOADHPS,
15210 IX86_BUILTIN_LOADLPS,
15211 IX86_BUILTIN_STOREHPS,
15212 IX86_BUILTIN_STORELPS,
15214 IX86_BUILTIN_MASKMOVQ,
15215 IX86_BUILTIN_MOVMSKPS,
15216 IX86_BUILTIN_PMOVMSKB,
15218 IX86_BUILTIN_MOVNTPS,
15219 IX86_BUILTIN_MOVNTQ,
15221 IX86_BUILTIN_LOADDQU,
15222 IX86_BUILTIN_STOREDQU,
15224 IX86_BUILTIN_PACKSSWB,
15225 IX86_BUILTIN_PACKSSDW,
15226 IX86_BUILTIN_PACKUSWB,
15228 IX86_BUILTIN_PADDB,
15229 IX86_BUILTIN_PADDW,
15230 IX86_BUILTIN_PADDD,
15231 IX86_BUILTIN_PADDQ,
15232 IX86_BUILTIN_PADDSB,
15233 IX86_BUILTIN_PADDSW,
15234 IX86_BUILTIN_PADDUSB,
15235 IX86_BUILTIN_PADDUSW,
15236 IX86_BUILTIN_PSUBB,
15237 IX86_BUILTIN_PSUBW,
15238 IX86_BUILTIN_PSUBD,
15239 IX86_BUILTIN_PSUBQ,
15240 IX86_BUILTIN_PSUBSB,
15241 IX86_BUILTIN_PSUBSW,
15242 IX86_BUILTIN_PSUBUSB,
15243 IX86_BUILTIN_PSUBUSW,
15246 IX86_BUILTIN_PANDN,
15250 IX86_BUILTIN_PAVGB,
15251 IX86_BUILTIN_PAVGW,
15253 IX86_BUILTIN_PCMPEQB,
15254 IX86_BUILTIN_PCMPEQW,
15255 IX86_BUILTIN_PCMPEQD,
15256 IX86_BUILTIN_PCMPGTB,
15257 IX86_BUILTIN_PCMPGTW,
15258 IX86_BUILTIN_PCMPGTD,
15260 IX86_BUILTIN_PMADDWD,
15262 IX86_BUILTIN_PMAXSW,
15263 IX86_BUILTIN_PMAXUB,
15264 IX86_BUILTIN_PMINSW,
15265 IX86_BUILTIN_PMINUB,
15267 IX86_BUILTIN_PMULHUW,
15268 IX86_BUILTIN_PMULHW,
15269 IX86_BUILTIN_PMULLW,
15271 IX86_BUILTIN_PSADBW,
15272 IX86_BUILTIN_PSHUFW,
15274 IX86_BUILTIN_PSLLW,
15275 IX86_BUILTIN_PSLLD,
15276 IX86_BUILTIN_PSLLQ,
15277 IX86_BUILTIN_PSRAW,
15278 IX86_BUILTIN_PSRAD,
15279 IX86_BUILTIN_PSRLW,
15280 IX86_BUILTIN_PSRLD,
15281 IX86_BUILTIN_PSRLQ,
15282 IX86_BUILTIN_PSLLWI,
15283 IX86_BUILTIN_PSLLDI,
15284 IX86_BUILTIN_PSLLQI,
15285 IX86_BUILTIN_PSRAWI,
15286 IX86_BUILTIN_PSRADI,
15287 IX86_BUILTIN_PSRLWI,
15288 IX86_BUILTIN_PSRLDI,
15289 IX86_BUILTIN_PSRLQI,
15291 IX86_BUILTIN_PUNPCKHBW,
15292 IX86_BUILTIN_PUNPCKHWD,
15293 IX86_BUILTIN_PUNPCKHDQ,
15294 IX86_BUILTIN_PUNPCKLBW,
15295 IX86_BUILTIN_PUNPCKLWD,
15296 IX86_BUILTIN_PUNPCKLDQ,
15298 IX86_BUILTIN_SHUFPS,
15300 IX86_BUILTIN_RCPPS,
15301 IX86_BUILTIN_RCPSS,
15302 IX86_BUILTIN_RSQRTPS,
15303 IX86_BUILTIN_RSQRTSS,
15304 IX86_BUILTIN_SQRTPS,
15305 IX86_BUILTIN_SQRTSS,
15307 IX86_BUILTIN_UNPCKHPS,
15308 IX86_BUILTIN_UNPCKLPS,
15310 IX86_BUILTIN_ANDPS,
15311 IX86_BUILTIN_ANDNPS,
15313 IX86_BUILTIN_XORPS,
15316 IX86_BUILTIN_LDMXCSR,
15317 IX86_BUILTIN_STMXCSR,
15318 IX86_BUILTIN_SFENCE,
15320 /* 3DNow! Original */
15321 IX86_BUILTIN_FEMMS,
15322 IX86_BUILTIN_PAVGUSB,
15323 IX86_BUILTIN_PF2ID,
15324 IX86_BUILTIN_PFACC,
15325 IX86_BUILTIN_PFADD,
15326 IX86_BUILTIN_PFCMPEQ,
15327 IX86_BUILTIN_PFCMPGE,
15328 IX86_BUILTIN_PFCMPGT,
15329 IX86_BUILTIN_PFMAX,
15330 IX86_BUILTIN_PFMIN,
15331 IX86_BUILTIN_PFMUL,
15332 IX86_BUILTIN_PFRCP,
15333 IX86_BUILTIN_PFRCPIT1,
15334 IX86_BUILTIN_PFRCPIT2,
15335 IX86_BUILTIN_PFRSQIT1,
15336 IX86_BUILTIN_PFRSQRT,
15337 IX86_BUILTIN_PFSUB,
15338 IX86_BUILTIN_PFSUBR,
15339 IX86_BUILTIN_PI2FD,
15340 IX86_BUILTIN_PMULHRW,
15342 /* 3DNow! Athlon Extensions */
15343 IX86_BUILTIN_PF2IW,
15344 IX86_BUILTIN_PFNACC,
15345 IX86_BUILTIN_PFPNACC,
15346 IX86_BUILTIN_PI2FW,
15347 IX86_BUILTIN_PSWAPDSI,
15348 IX86_BUILTIN_PSWAPDSF,
15351 IX86_BUILTIN_ADDPD,
15352 IX86_BUILTIN_ADDSD,
15353 IX86_BUILTIN_DIVPD,
15354 IX86_BUILTIN_DIVSD,
15355 IX86_BUILTIN_MULPD,
15356 IX86_BUILTIN_MULSD,
15357 IX86_BUILTIN_SUBPD,
15358 IX86_BUILTIN_SUBSD,
15360 IX86_BUILTIN_CMPEQPD,
15361 IX86_BUILTIN_CMPLTPD,
15362 IX86_BUILTIN_CMPLEPD,
15363 IX86_BUILTIN_CMPGTPD,
15364 IX86_BUILTIN_CMPGEPD,
15365 IX86_BUILTIN_CMPNEQPD,
15366 IX86_BUILTIN_CMPNLTPD,
15367 IX86_BUILTIN_CMPNLEPD,
15368 IX86_BUILTIN_CMPNGTPD,
15369 IX86_BUILTIN_CMPNGEPD,
15370 IX86_BUILTIN_CMPORDPD,
15371 IX86_BUILTIN_CMPUNORDPD,
15372 IX86_BUILTIN_CMPNEPD,
15373 IX86_BUILTIN_CMPEQSD,
15374 IX86_BUILTIN_CMPLTSD,
15375 IX86_BUILTIN_CMPLESD,
15376 IX86_BUILTIN_CMPNEQSD,
15377 IX86_BUILTIN_CMPNLTSD,
15378 IX86_BUILTIN_CMPNLESD,
15379 IX86_BUILTIN_CMPORDSD,
15380 IX86_BUILTIN_CMPUNORDSD,
15381 IX86_BUILTIN_CMPNESD,
15383 IX86_BUILTIN_COMIEQSD,
15384 IX86_BUILTIN_COMILTSD,
15385 IX86_BUILTIN_COMILESD,
15386 IX86_BUILTIN_COMIGTSD,
15387 IX86_BUILTIN_COMIGESD,
15388 IX86_BUILTIN_COMINEQSD,
15389 IX86_BUILTIN_UCOMIEQSD,
15390 IX86_BUILTIN_UCOMILTSD,
15391 IX86_BUILTIN_UCOMILESD,
15392 IX86_BUILTIN_UCOMIGTSD,
15393 IX86_BUILTIN_UCOMIGESD,
15394 IX86_BUILTIN_UCOMINEQSD,
15396 IX86_BUILTIN_MAXPD,
15397 IX86_BUILTIN_MAXSD,
15398 IX86_BUILTIN_MINPD,
15399 IX86_BUILTIN_MINSD,
15401 IX86_BUILTIN_ANDPD,
15402 IX86_BUILTIN_ANDNPD,
15404 IX86_BUILTIN_XORPD,
15406 IX86_BUILTIN_SQRTPD,
15407 IX86_BUILTIN_SQRTSD,
15409 IX86_BUILTIN_UNPCKHPD,
15410 IX86_BUILTIN_UNPCKLPD,
15412 IX86_BUILTIN_SHUFPD,
15414 IX86_BUILTIN_LOADUPD,
15415 IX86_BUILTIN_STOREUPD,
15416 IX86_BUILTIN_MOVSD,
15418 IX86_BUILTIN_LOADHPD,
15419 IX86_BUILTIN_LOADLPD,
15421 IX86_BUILTIN_CVTDQ2PD,
15422 IX86_BUILTIN_CVTDQ2PS,
15424 IX86_BUILTIN_CVTPD2DQ,
15425 IX86_BUILTIN_CVTPD2PI,
15426 IX86_BUILTIN_CVTPD2PS,
15427 IX86_BUILTIN_CVTTPD2DQ,
15428 IX86_BUILTIN_CVTTPD2PI,
15430 IX86_BUILTIN_CVTPI2PD,
15431 IX86_BUILTIN_CVTSI2SD,
15432 IX86_BUILTIN_CVTSI642SD,
15434 IX86_BUILTIN_CVTSD2SI,
15435 IX86_BUILTIN_CVTSD2SI64,
15436 IX86_BUILTIN_CVTSD2SS,
15437 IX86_BUILTIN_CVTSS2SD,
15438 IX86_BUILTIN_CVTTSD2SI,
15439 IX86_BUILTIN_CVTTSD2SI64,
15441 IX86_BUILTIN_CVTPS2DQ,
15442 IX86_BUILTIN_CVTPS2PD,
15443 IX86_BUILTIN_CVTTPS2DQ,
15445 IX86_BUILTIN_MOVNTI,
15446 IX86_BUILTIN_MOVNTPD,
15447 IX86_BUILTIN_MOVNTDQ,
15450 IX86_BUILTIN_MASKMOVDQU,
15451 IX86_BUILTIN_MOVMSKPD,
15452 IX86_BUILTIN_PMOVMSKB128,
15454 IX86_BUILTIN_PACKSSWB128,
15455 IX86_BUILTIN_PACKSSDW128,
15456 IX86_BUILTIN_PACKUSWB128,
15458 IX86_BUILTIN_PADDB128,
15459 IX86_BUILTIN_PADDW128,
15460 IX86_BUILTIN_PADDD128,
15461 IX86_BUILTIN_PADDQ128,
15462 IX86_BUILTIN_PADDSB128,
15463 IX86_BUILTIN_PADDSW128,
15464 IX86_BUILTIN_PADDUSB128,
15465 IX86_BUILTIN_PADDUSW128,
15466 IX86_BUILTIN_PSUBB128,
15467 IX86_BUILTIN_PSUBW128,
15468 IX86_BUILTIN_PSUBD128,
15469 IX86_BUILTIN_PSUBQ128,
15470 IX86_BUILTIN_PSUBSB128,
15471 IX86_BUILTIN_PSUBSW128,
15472 IX86_BUILTIN_PSUBUSB128,
15473 IX86_BUILTIN_PSUBUSW128,
15475 IX86_BUILTIN_PAND128,
15476 IX86_BUILTIN_PANDN128,
15477 IX86_BUILTIN_POR128,
15478 IX86_BUILTIN_PXOR128,
15480 IX86_BUILTIN_PAVGB128,
15481 IX86_BUILTIN_PAVGW128,
15483 IX86_BUILTIN_PCMPEQB128,
15484 IX86_BUILTIN_PCMPEQW128,
15485 IX86_BUILTIN_PCMPEQD128,
15486 IX86_BUILTIN_PCMPGTB128,
15487 IX86_BUILTIN_PCMPGTW128,
15488 IX86_BUILTIN_PCMPGTD128,
15490 IX86_BUILTIN_PMADDWD128,
15492 IX86_BUILTIN_PMAXSW128,
15493 IX86_BUILTIN_PMAXUB128,
15494 IX86_BUILTIN_PMINSW128,
15495 IX86_BUILTIN_PMINUB128,
15497 IX86_BUILTIN_PMULUDQ,
15498 IX86_BUILTIN_PMULUDQ128,
15499 IX86_BUILTIN_PMULHUW128,
15500 IX86_BUILTIN_PMULHW128,
15501 IX86_BUILTIN_PMULLW128,
15503 IX86_BUILTIN_PSADBW128,
15504 IX86_BUILTIN_PSHUFHW,
15505 IX86_BUILTIN_PSHUFLW,
15506 IX86_BUILTIN_PSHUFD,
15508 IX86_BUILTIN_PSLLW128,
15509 IX86_BUILTIN_PSLLD128,
15510 IX86_BUILTIN_PSLLQ128,
15511 IX86_BUILTIN_PSRAW128,
15512 IX86_BUILTIN_PSRAD128,
15513 IX86_BUILTIN_PSRLW128,
15514 IX86_BUILTIN_PSRLD128,
15515 IX86_BUILTIN_PSRLQ128,
15516 IX86_BUILTIN_PSLLDQI128,
15517 IX86_BUILTIN_PSLLWI128,
15518 IX86_BUILTIN_PSLLDI128,
15519 IX86_BUILTIN_PSLLQI128,
15520 IX86_BUILTIN_PSRAWI128,
15521 IX86_BUILTIN_PSRADI128,
15522 IX86_BUILTIN_PSRLDQI128,
15523 IX86_BUILTIN_PSRLWI128,
15524 IX86_BUILTIN_PSRLDI128,
15525 IX86_BUILTIN_PSRLQI128,
15527 IX86_BUILTIN_PUNPCKHBW128,
15528 IX86_BUILTIN_PUNPCKHWD128,
15529 IX86_BUILTIN_PUNPCKHDQ128,
15530 IX86_BUILTIN_PUNPCKHQDQ128,
15531 IX86_BUILTIN_PUNPCKLBW128,
15532 IX86_BUILTIN_PUNPCKLWD128,
15533 IX86_BUILTIN_PUNPCKLDQ128,
15534 IX86_BUILTIN_PUNPCKLQDQ128,
15536 IX86_BUILTIN_CLFLUSH,
15537 IX86_BUILTIN_MFENCE,
15538 IX86_BUILTIN_LFENCE,
15540 /* Prescott New Instructions. */
15541 IX86_BUILTIN_ADDSUBPS,
15542 IX86_BUILTIN_HADDPS,
15543 IX86_BUILTIN_HSUBPS,
15544 IX86_BUILTIN_MOVSHDUP,
15545 IX86_BUILTIN_MOVSLDUP,
15546 IX86_BUILTIN_ADDSUBPD,
15547 IX86_BUILTIN_HADDPD,
15548 IX86_BUILTIN_HSUBPD,
15549 IX86_BUILTIN_LDDQU,
15551 IX86_BUILTIN_MONITOR,
15552 IX86_BUILTIN_MWAIT,
15555 IX86_BUILTIN_PHADDW,
15556 IX86_BUILTIN_PHADDD,
15557 IX86_BUILTIN_PHADDSW,
15558 IX86_BUILTIN_PHSUBW,
15559 IX86_BUILTIN_PHSUBD,
15560 IX86_BUILTIN_PHSUBSW,
15561 IX86_BUILTIN_PMADDUBSW,
15562 IX86_BUILTIN_PMULHRSW,
15563 IX86_BUILTIN_PSHUFB,
15564 IX86_BUILTIN_PSIGNB,
15565 IX86_BUILTIN_PSIGNW,
15566 IX86_BUILTIN_PSIGND,
15567 IX86_BUILTIN_PALIGNR,
15568 IX86_BUILTIN_PABSB,
15569 IX86_BUILTIN_PABSW,
15570 IX86_BUILTIN_PABSD,
15572 IX86_BUILTIN_PHADDW128,
15573 IX86_BUILTIN_PHADDD128,
15574 IX86_BUILTIN_PHADDSW128,
15575 IX86_BUILTIN_PHSUBW128,
15576 IX86_BUILTIN_PHSUBD128,
15577 IX86_BUILTIN_PHSUBSW128,
15578 IX86_BUILTIN_PMADDUBSW128,
15579 IX86_BUILTIN_PMULHRSW128,
15580 IX86_BUILTIN_PSHUFB128,
15581 IX86_BUILTIN_PSIGNB128,
15582 IX86_BUILTIN_PSIGNW128,
15583 IX86_BUILTIN_PSIGND128,
15584 IX86_BUILTIN_PALIGNR128,
15585 IX86_BUILTIN_PABSB128,
15586 IX86_BUILTIN_PABSW128,
15587 IX86_BUILTIN_PABSD128,
15589 IX86_BUILTIN_VEC_INIT_V2SI,
15590 IX86_BUILTIN_VEC_INIT_V4HI,
15591 IX86_BUILTIN_VEC_INIT_V8QI,
15592 IX86_BUILTIN_VEC_EXT_V2DF,
15593 IX86_BUILTIN_VEC_EXT_V2DI,
15594 IX86_BUILTIN_VEC_EXT_V4SF,
15595 IX86_BUILTIN_VEC_EXT_V4SI,
15596 IX86_BUILTIN_VEC_EXT_V8HI,
15597 IX86_BUILTIN_VEC_EXT_V2SI,
15598 IX86_BUILTIN_VEC_EXT_V4HI,
15599 IX86_BUILTIN_VEC_SET_V8HI,
15600 IX86_BUILTIN_VEC_SET_V4HI,
15605 /* Table for the ix86 builtin decls. */
15606 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
15608 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15609 * if the target_flags include one of MASK. Stores the function decl
15610 * in the ix86_builtins array.
15611 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15614 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
15616 tree decl = NULL_TREE;
15618 if (mask & target_flags
15619 && (!(mask & MASK_64BIT) || TARGET_64BIT))
15621 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
15623 ix86_builtins[(int) code] = decl;
15629 /* Like def_builtin, but also marks the function decl "const". */
15632 def_builtin_const (int mask, const char *name, tree type,
15633 enum ix86_builtins code)
15635 tree decl = def_builtin (mask, name, type, code);
15637 TREE_READONLY (decl) = 1;
15641 /* Bits for builtin_description.flag. */
15643 /* Set when we don't support the comparison natively, and should
15644 swap_comparison in order to support it. */
15645 #define BUILTIN_DESC_SWAP_OPERANDS 1
15647 struct builtin_description
15649 const unsigned int mask;
15650 const enum insn_code icode;
15651 const char *const name;
15652 const enum ix86_builtins code;
15653 const enum rtx_code comparison;
15654 const unsigned int flag;
15657 static const struct builtin_description bdesc_comi[] =
15659 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15660 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15665 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15666 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15671 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15672 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15673 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15674 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15675 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15677 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15678 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15679 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15680 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15681 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15685 static const struct builtin_description bdesc_2arg[] =
15688 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15689 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15690 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15691 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15692 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15693 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15694 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15695 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15697 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15698 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15701 BUILTIN_DESC_SWAP_OPERANDS },
15702 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15703 BUILTIN_DESC_SWAP_OPERANDS },
15704 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15705 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15707 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15708 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15709 BUILTIN_DESC_SWAP_OPERANDS },
15710 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15711 BUILTIN_DESC_SWAP_OPERANDS },
15712 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15713 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15714 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15715 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15717 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15719 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15720 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15721 BUILTIN_DESC_SWAP_OPERANDS },
15722 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15723 BUILTIN_DESC_SWAP_OPERANDS },
15724 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
15726 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15727 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15728 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15729 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15731 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15732 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15733 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15734 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15736 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15737 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15738 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15739 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15740 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15743 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15744 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15745 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15746 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15747 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15748 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15749 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15750 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15752 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15753 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15754 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15755 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15756 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15757 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15758 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15759 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15761 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15762 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15763 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15765 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15766 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15767 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15768 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15770 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15771 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15773 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15774 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15775 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15776 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15777 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15778 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15785 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15786 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15787 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15788 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15789 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15790 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15793 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15794 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15795 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15797 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15798 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15799 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15801 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15802 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15803 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15804 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15805 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15806 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15808 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15809 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15810 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15811 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15812 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15813 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15815 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15816 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15817 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15818 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15820 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15821 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15824 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15825 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15826 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15827 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15828 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15829 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15830 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15831 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15833 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15834 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15837 BUILTIN_DESC_SWAP_OPERANDS },
15838 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15839 BUILTIN_DESC_SWAP_OPERANDS },
15840 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15841 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15843 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15844 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15845 BUILTIN_DESC_SWAP_OPERANDS },
15846 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15847 BUILTIN_DESC_SWAP_OPERANDS },
15848 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15849 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15850 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15851 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15852 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15853 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15854 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15855 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15856 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15858 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15859 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15860 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15861 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15863 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15864 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15865 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15866 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15868 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15869 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15870 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15873 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15874 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15875 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15876 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15877 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15878 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15879 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15880 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15882 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15883 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15884 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15885 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15886 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15887 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15888 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15889 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15891 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15892 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15894 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15895 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15896 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15897 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15899 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15900 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15902 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15903 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15904 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15905 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15906 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15907 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15909 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15910 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15911 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15912 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15914 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15915 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15916 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15917 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15918 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15919 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15920 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15921 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15923 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15924 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15925 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15927 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15928 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15930 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15931 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15933 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15934 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15935 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15937 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15938 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15939 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15941 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15942 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15944 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15946 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15947 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15948 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15949 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15952 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15953 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15954 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15955 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15956 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15957 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15960 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15961 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15962 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15963 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15964 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15965 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15966 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15967 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15968 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15969 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15970 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15971 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15972 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15973 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15974 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15975 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15976 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15977 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15978 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15979 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15980 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15981 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15982 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15983 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15986 static const struct builtin_description bdesc_1arg[] =
15988 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15989 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15991 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15992 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15993 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15995 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15996 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15997 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15998 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15999 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16000 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16002 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16003 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16005 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16007 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16008 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16010 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16011 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16012 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16013 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16014 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16016 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16018 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16019 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16020 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16021 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16023 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16024 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16025 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16028 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16029 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16032 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16033 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16034 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16035 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16036 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16037 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16041 ix86_init_builtins (void)
16044 ix86_init_mmx_sse_builtins ();
16047 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16048 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16051 ix86_init_mmx_sse_builtins (void)
16053 const struct builtin_description * d;
16056 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
16057 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16058 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16059 tree V2DI_type_node
16060 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16061 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16062 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16063 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16064 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16065 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16066 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16068 tree pchar_type_node = build_pointer_type (char_type_node);
16069 tree pcchar_type_node = build_pointer_type (
16070 build_type_variant (char_type_node, 1, 0));
16071 tree pfloat_type_node = build_pointer_type (float_type_node);
16072 tree pcfloat_type_node = build_pointer_type (
16073 build_type_variant (float_type_node, 1, 0));
16074 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16075 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16076 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16079 tree int_ftype_v4sf_v4sf
16080 = build_function_type_list (integer_type_node,
16081 V4SF_type_node, V4SF_type_node, NULL_TREE);
16082 tree v4si_ftype_v4sf_v4sf
16083 = build_function_type_list (V4SI_type_node,
16084 V4SF_type_node, V4SF_type_node, NULL_TREE);
16085 /* MMX/SSE/integer conversions. */
16086 tree int_ftype_v4sf
16087 = build_function_type_list (integer_type_node,
16088 V4SF_type_node, NULL_TREE);
16089 tree int64_ftype_v4sf
16090 = build_function_type_list (long_long_integer_type_node,
16091 V4SF_type_node, NULL_TREE);
16092 tree int_ftype_v8qi
16093 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16094 tree v4sf_ftype_v4sf_int
16095 = build_function_type_list (V4SF_type_node,
16096 V4SF_type_node, integer_type_node, NULL_TREE);
16097 tree v4sf_ftype_v4sf_int64
16098 = build_function_type_list (V4SF_type_node,
16099 V4SF_type_node, long_long_integer_type_node,
16101 tree v4sf_ftype_v4sf_v2si
16102 = build_function_type_list (V4SF_type_node,
16103 V4SF_type_node, V2SI_type_node, NULL_TREE);
16105 /* Miscellaneous. */
16106 tree v8qi_ftype_v4hi_v4hi
16107 = build_function_type_list (V8QI_type_node,
16108 V4HI_type_node, V4HI_type_node, NULL_TREE);
16109 tree v4hi_ftype_v2si_v2si
16110 = build_function_type_list (V4HI_type_node,
16111 V2SI_type_node, V2SI_type_node, NULL_TREE);
16112 tree v4sf_ftype_v4sf_v4sf_int
16113 = build_function_type_list (V4SF_type_node,
16114 V4SF_type_node, V4SF_type_node,
16115 integer_type_node, NULL_TREE);
16116 tree v2si_ftype_v4hi_v4hi
16117 = build_function_type_list (V2SI_type_node,
16118 V4HI_type_node, V4HI_type_node, NULL_TREE);
16119 tree v4hi_ftype_v4hi_int
16120 = build_function_type_list (V4HI_type_node,
16121 V4HI_type_node, integer_type_node, NULL_TREE);
16122 tree v4hi_ftype_v4hi_di
16123 = build_function_type_list (V4HI_type_node,
16124 V4HI_type_node, long_long_unsigned_type_node,
16126 tree v2si_ftype_v2si_di
16127 = build_function_type_list (V2SI_type_node,
16128 V2SI_type_node, long_long_unsigned_type_node,
16130 tree void_ftype_void
16131 = build_function_type (void_type_node, void_list_node);
16132 tree void_ftype_unsigned
16133 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16134 tree void_ftype_unsigned_unsigned
16135 = build_function_type_list (void_type_node, unsigned_type_node,
16136 unsigned_type_node, NULL_TREE);
16137 tree void_ftype_pcvoid_unsigned_unsigned
16138 = build_function_type_list (void_type_node, const_ptr_type_node,
16139 unsigned_type_node, unsigned_type_node,
16141 tree unsigned_ftype_void
16142 = build_function_type (unsigned_type_node, void_list_node);
16143 tree v2si_ftype_v4sf
16144 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16145 /* Loads/stores. */
16146 tree void_ftype_v8qi_v8qi_pchar
16147 = build_function_type_list (void_type_node,
16148 V8QI_type_node, V8QI_type_node,
16149 pchar_type_node, NULL_TREE);
16150 tree v4sf_ftype_pcfloat
16151 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16152 /* @@@ the type is bogus */
16153 tree v4sf_ftype_v4sf_pv2si
16154 = build_function_type_list (V4SF_type_node,
16155 V4SF_type_node, pv2si_type_node, NULL_TREE);
16156 tree void_ftype_pv2si_v4sf
16157 = build_function_type_list (void_type_node,
16158 pv2si_type_node, V4SF_type_node, NULL_TREE);
16159 tree void_ftype_pfloat_v4sf
16160 = build_function_type_list (void_type_node,
16161 pfloat_type_node, V4SF_type_node, NULL_TREE);
16162 tree void_ftype_pdi_di
16163 = build_function_type_list (void_type_node,
16164 pdi_type_node, long_long_unsigned_type_node,
16166 tree void_ftype_pv2di_v2di
16167 = build_function_type_list (void_type_node,
16168 pv2di_type_node, V2DI_type_node, NULL_TREE);
16169 /* Normal vector unops. */
16170 tree v4sf_ftype_v4sf
16171 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16172 tree v16qi_ftype_v16qi
16173 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16174 tree v8hi_ftype_v8hi
16175 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16176 tree v4si_ftype_v4si
16177 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16178 tree v8qi_ftype_v8qi
16179 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16180 tree v4hi_ftype_v4hi
16181 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16183 /* Normal vector binops. */
16184 tree v4sf_ftype_v4sf_v4sf
16185 = build_function_type_list (V4SF_type_node,
16186 V4SF_type_node, V4SF_type_node, NULL_TREE);
16187 tree v8qi_ftype_v8qi_v8qi
16188 = build_function_type_list (V8QI_type_node,
16189 V8QI_type_node, V8QI_type_node, NULL_TREE);
16190 tree v4hi_ftype_v4hi_v4hi
16191 = build_function_type_list (V4HI_type_node,
16192 V4HI_type_node, V4HI_type_node, NULL_TREE);
16193 tree v2si_ftype_v2si_v2si
16194 = build_function_type_list (V2SI_type_node,
16195 V2SI_type_node, V2SI_type_node, NULL_TREE);
16196 tree di_ftype_di_di
16197 = build_function_type_list (long_long_unsigned_type_node,
16198 long_long_unsigned_type_node,
16199 long_long_unsigned_type_node, NULL_TREE);
16201 tree di_ftype_di_di_int
16202 = build_function_type_list (long_long_unsigned_type_node,
16203 long_long_unsigned_type_node,
16204 long_long_unsigned_type_node,
16205 integer_type_node, NULL_TREE);
16207 tree v2si_ftype_v2sf
16208 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16209 tree v2sf_ftype_v2si
16210 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16211 tree v2si_ftype_v2si
16212 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16213 tree v2sf_ftype_v2sf
16214 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16215 tree v2sf_ftype_v2sf_v2sf
16216 = build_function_type_list (V2SF_type_node,
16217 V2SF_type_node, V2SF_type_node, NULL_TREE);
16218 tree v2si_ftype_v2sf_v2sf
16219 = build_function_type_list (V2SI_type_node,
16220 V2SF_type_node, V2SF_type_node, NULL_TREE);
16221 tree pint_type_node = build_pointer_type (integer_type_node);
16222 tree pdouble_type_node = build_pointer_type (double_type_node);
16223 tree pcdouble_type_node = build_pointer_type (
16224 build_type_variant (double_type_node, 1, 0));
16225 tree int_ftype_v2df_v2df
16226 = build_function_type_list (integer_type_node,
16227 V2DF_type_node, V2DF_type_node, NULL_TREE);
16229 tree void_ftype_pcvoid
16230 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16231 tree v4sf_ftype_v4si
16232 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16233 tree v4si_ftype_v4sf
16234 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16235 tree v2df_ftype_v4si
16236 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16237 tree v4si_ftype_v2df
16238 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16239 tree v2si_ftype_v2df
16240 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16241 tree v4sf_ftype_v2df
16242 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16243 tree v2df_ftype_v2si
16244 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16245 tree v2df_ftype_v4sf
16246 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16247 tree int_ftype_v2df
16248 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16249 tree int64_ftype_v2df
16250 = build_function_type_list (long_long_integer_type_node,
16251 V2DF_type_node, NULL_TREE);
16252 tree v2df_ftype_v2df_int
16253 = build_function_type_list (V2DF_type_node,
16254 V2DF_type_node, integer_type_node, NULL_TREE);
16255 tree v2df_ftype_v2df_int64
16256 = build_function_type_list (V2DF_type_node,
16257 V2DF_type_node, long_long_integer_type_node,
16259 tree v4sf_ftype_v4sf_v2df
16260 = build_function_type_list (V4SF_type_node,
16261 V4SF_type_node, V2DF_type_node, NULL_TREE);
16262 tree v2df_ftype_v2df_v4sf
16263 = build_function_type_list (V2DF_type_node,
16264 V2DF_type_node, V4SF_type_node, NULL_TREE);
16265 tree v2df_ftype_v2df_v2df_int
16266 = build_function_type_list (V2DF_type_node,
16267 V2DF_type_node, V2DF_type_node,
16270 tree v2df_ftype_v2df_pcdouble
16271 = build_function_type_list (V2DF_type_node,
16272 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16273 tree void_ftype_pdouble_v2df
16274 = build_function_type_list (void_type_node,
16275 pdouble_type_node, V2DF_type_node, NULL_TREE);
16276 tree void_ftype_pint_int
16277 = build_function_type_list (void_type_node,
16278 pint_type_node, integer_type_node, NULL_TREE);
16279 tree void_ftype_v16qi_v16qi_pchar
16280 = build_function_type_list (void_type_node,
16281 V16QI_type_node, V16QI_type_node,
16282 pchar_type_node, NULL_TREE);
16283 tree v2df_ftype_pcdouble
16284 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16285 tree v2df_ftype_v2df_v2df
16286 = build_function_type_list (V2DF_type_node,
16287 V2DF_type_node, V2DF_type_node, NULL_TREE);
16288 tree v16qi_ftype_v16qi_v16qi
16289 = build_function_type_list (V16QI_type_node,
16290 V16QI_type_node, V16QI_type_node, NULL_TREE);
16291 tree v8hi_ftype_v8hi_v8hi
16292 = build_function_type_list (V8HI_type_node,
16293 V8HI_type_node, V8HI_type_node, NULL_TREE);
16294 tree v4si_ftype_v4si_v4si
16295 = build_function_type_list (V4SI_type_node,
16296 V4SI_type_node, V4SI_type_node, NULL_TREE);
16297 tree v2di_ftype_v2di_v2di
16298 = build_function_type_list (V2DI_type_node,
16299 V2DI_type_node, V2DI_type_node, NULL_TREE);
16300 tree v2di_ftype_v2df_v2df
16301 = build_function_type_list (V2DI_type_node,
16302 V2DF_type_node, V2DF_type_node, NULL_TREE);
16303 tree v2df_ftype_v2df
16304 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16305 tree v2di_ftype_v2di_int
16306 = build_function_type_list (V2DI_type_node,
16307 V2DI_type_node, integer_type_node, NULL_TREE);
16308 tree v2di_ftype_v2di_v2di_int
16309 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16310 V2DI_type_node, integer_type_node, NULL_TREE);
16311 tree v4si_ftype_v4si_int
16312 = build_function_type_list (V4SI_type_node,
16313 V4SI_type_node, integer_type_node, NULL_TREE);
16314 tree v8hi_ftype_v8hi_int
16315 = build_function_type_list (V8HI_type_node,
16316 V8HI_type_node, integer_type_node, NULL_TREE);
16317 tree v8hi_ftype_v8hi_v2di
16318 = build_function_type_list (V8HI_type_node,
16319 V8HI_type_node, V2DI_type_node, NULL_TREE);
16320 tree v4si_ftype_v4si_v2di
16321 = build_function_type_list (V4SI_type_node,
16322 V4SI_type_node, V2DI_type_node, NULL_TREE);
16323 tree v4si_ftype_v8hi_v8hi
16324 = build_function_type_list (V4SI_type_node,
16325 V8HI_type_node, V8HI_type_node, NULL_TREE);
16326 tree di_ftype_v8qi_v8qi
16327 = build_function_type_list (long_long_unsigned_type_node,
16328 V8QI_type_node, V8QI_type_node, NULL_TREE);
16329 tree di_ftype_v2si_v2si
16330 = build_function_type_list (long_long_unsigned_type_node,
16331 V2SI_type_node, V2SI_type_node, NULL_TREE);
16332 tree v2di_ftype_v16qi_v16qi
16333 = build_function_type_list (V2DI_type_node,
16334 V16QI_type_node, V16QI_type_node, NULL_TREE);
16335 tree v2di_ftype_v4si_v4si
16336 = build_function_type_list (V2DI_type_node,
16337 V4SI_type_node, V4SI_type_node, NULL_TREE);
16338 tree int_ftype_v16qi
16339 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16340 tree v16qi_ftype_pcchar
16341 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16342 tree void_ftype_pchar_v16qi
16343 = build_function_type_list (void_type_node,
16344 pchar_type_node, V16QI_type_node, NULL_TREE);
16347 tree float128_type;
16350 /* The __float80 type. */
16351 if (TYPE_MODE (long_double_type_node) == XFmode)
16352 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16356 /* The __float80 type. */
16357 float80_type = make_node (REAL_TYPE);
16358 TYPE_PRECISION (float80_type) = 80;
16359 layout_type (float80_type);
16360 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16365 float128_type = make_node (REAL_TYPE);
16366 TYPE_PRECISION (float128_type) = 128;
16367 layout_type (float128_type);
16368 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16371 /* Add all builtins that are more or less simple operations on two
16373 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16375 /* Use one of the operands; the target can have a different mode for
16376 mask-generating compares. */
16377 enum machine_mode mode;
16382 mode = insn_data[d->icode].operand[1].mode;
16387 type = v16qi_ftype_v16qi_v16qi;
16390 type = v8hi_ftype_v8hi_v8hi;
16393 type = v4si_ftype_v4si_v4si;
16396 type = v2di_ftype_v2di_v2di;
16399 type = v2df_ftype_v2df_v2df;
16402 type = v4sf_ftype_v4sf_v4sf;
16405 type = v8qi_ftype_v8qi_v8qi;
16408 type = v4hi_ftype_v4hi_v4hi;
16411 type = v2si_ftype_v2si_v2si;
16414 type = di_ftype_di_di;
16418 gcc_unreachable ();
16421 /* Override for comparisons. */
16422 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16423 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16424 type = v4si_ftype_v4sf_v4sf;
16426 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16427 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16428 type = v2di_ftype_v2df_v2df;
16430 def_builtin (d->mask, d->name, type, d->code);
16433 /* Add all builtins that are more or less simple operations on 1 operand. */
16434 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16436 enum machine_mode mode;
16441 mode = insn_data[d->icode].operand[1].mode;
16446 type = v16qi_ftype_v16qi;
16449 type = v8hi_ftype_v8hi;
16452 type = v4si_ftype_v4si;
16455 type = v2df_ftype_v2df;
16458 type = v4sf_ftype_v4sf;
16461 type = v8qi_ftype_v8qi;
16464 type = v4hi_ftype_v4hi;
16467 type = v2si_ftype_v2si;
16474 def_builtin (d->mask, d->name, type, d->code);
16477 /* Add the remaining MMX insns with somewhat more complicated types. */
16478 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16479 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16480 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16481 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16483 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16484 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16485 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16487 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16488 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16490 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16491 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16493 /* comi/ucomi insns. */
16494 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16495 if (d->mask == MASK_SSE2)
16496 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16498 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16500 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16501 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16502 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16504 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16505 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16506 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16507 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16508 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16509 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16510 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16511 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16512 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16513 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16514 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16516 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16518 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16519 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16521 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16522 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16523 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16524 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16526 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16527 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16528 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16529 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16531 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16533 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16535 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16536 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16537 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16538 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16539 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16540 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16542 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16544 /* Original 3DNow! */
16545 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16546 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16547 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16555 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16556 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16557 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16558 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16559 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16560 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16561 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16562 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16563 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16564 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16566 /* 3DNow! extension as used in the Athlon CPU. */
16567 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
16568 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
16569 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
16570 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
16571 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
16572 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
16575 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
16577 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
16578 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
16580 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
16581 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
16583 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
16584 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
16585 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
16586 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
16587 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
16589 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
16590 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
16591 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
16592 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
16594 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
16595 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
16597 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
16599 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
16600 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
16602 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
16603 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
16604 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
16605 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
16606 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
16608 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
16610 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
16611 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
16612 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
16613 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
16615 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
16616 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
16617 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
16619 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
16620 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
16621 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
16622 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
16624 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
16625 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
16626 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
16628 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
16629 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
16631 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
16632 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
16634 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
16635 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
16636 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
16638 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
16639 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
16640 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16642 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
16643 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
16645 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16646 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16647 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16648 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16650 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16651 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16652 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16653 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16655 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16656 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16658 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16660 /* Prescott New Instructions. */
16661 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16662 void_ftype_pcvoid_unsigned_unsigned,
16663 IX86_BUILTIN_MONITOR);
16664 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16665 void_ftype_unsigned_unsigned,
16666 IX86_BUILTIN_MWAIT);
16667 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16669 IX86_BUILTIN_MOVSHDUP);
16670 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16672 IX86_BUILTIN_MOVSLDUP);
16673 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16674 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16677 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16678 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16679 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16680 IX86_BUILTIN_PALIGNR);
16682 /* Access to the vec_init patterns. */
16683 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16684 integer_type_node, NULL_TREE);
16685 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16686 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16688 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16689 short_integer_type_node,
16690 short_integer_type_node,
16691 short_integer_type_node, NULL_TREE);
16692 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16693 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16695 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16696 char_type_node, char_type_node,
16697 char_type_node, char_type_node,
16698 char_type_node, char_type_node,
16699 char_type_node, NULL_TREE);
16700 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16701 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16703 /* Access to the vec_extract patterns. */
16704 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16705 integer_type_node, NULL_TREE);
16706 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
16707 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16709 ftype = build_function_type_list (long_long_integer_type_node,
16710 V2DI_type_node, integer_type_node,
16712 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
16713 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16715 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16716 integer_type_node, NULL_TREE);
16717 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16718 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16720 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16721 integer_type_node, NULL_TREE);
16722 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
16723 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16725 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16726 integer_type_node, NULL_TREE);
16727 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
16728 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16730 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16731 integer_type_node, NULL_TREE);
16732 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16733 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16735 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16736 integer_type_node, NULL_TREE);
16737 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16738 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16740 /* Access to the vec_set patterns. */
16741 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16743 integer_type_node, NULL_TREE);
16744 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
16745 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16747 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16749 integer_type_node, NULL_TREE);
16750 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16751 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16754 /* Errors in the source file can cause expand_expr to return const0_rtx
16755 where we expect a vector. To avoid crashing, use one of the vector
16756 clear instructions. */
16758 safe_vector_operand (rtx x, enum machine_mode mode)
16760 if (x == const0_rtx)
16761 x = CONST0_RTX (mode);
16765 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16768 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16771 tree arg0 = TREE_VALUE (arglist);
16772 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16773 rtx op0 = expand_normal (arg0);
16774 rtx op1 = expand_normal (arg1);
16775 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16776 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16777 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16779 if (VECTOR_MODE_P (mode0))
16780 op0 = safe_vector_operand (op0, mode0);
16781 if (VECTOR_MODE_P (mode1))
16782 op1 = safe_vector_operand (op1, mode1);
16784 if (optimize || !target
16785 || GET_MODE (target) != tmode
16786 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16787 target = gen_reg_rtx (tmode);
16789 if (GET_MODE (op1) == SImode && mode1 == TImode)
16791 rtx x = gen_reg_rtx (V4SImode);
16792 emit_insn (gen_sse2_loadd (x, op1));
16793 op1 = gen_lowpart (TImode, x);
16796 /* The insn must want input operands in the same modes as the
16798 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16799 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16801 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16802 op0 = copy_to_mode_reg (mode0, op0);
16803 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16804 op1 = copy_to_mode_reg (mode1, op1);
16806 /* ??? Using ix86_fixup_binary_operands is problematic when
16807 we've got mismatched modes. Fake it. */
16813 if (tmode == mode0 && tmode == mode1)
16815 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16819 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16821 op0 = force_reg (mode0, op0);
16822 op1 = force_reg (mode1, op1);
16823 target = gen_reg_rtx (tmode);
16826 pat = GEN_FCN (icode) (target, op0, op1);
16833 /* Subroutine of ix86_expand_builtin to take care of stores. */
16836 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16839 tree arg0 = TREE_VALUE (arglist);
16840 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16841 rtx op0 = expand_normal (arg0);
16842 rtx op1 = expand_normal (arg1);
16843 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16844 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16846 if (VECTOR_MODE_P (mode1))
16847 op1 = safe_vector_operand (op1, mode1);
16849 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16850 op1 = copy_to_mode_reg (mode1, op1);
16852 pat = GEN_FCN (icode) (op0, op1);
16858 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16861 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16862 rtx target, int do_load)
16865 tree arg0 = TREE_VALUE (arglist);
16866 rtx op0 = expand_normal (arg0);
16867 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16868 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16870 if (optimize || !target
16871 || GET_MODE (target) != tmode
16872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16873 target = gen_reg_rtx (tmode);
16875 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16878 if (VECTOR_MODE_P (mode0))
16879 op0 = safe_vector_operand (op0, mode0);
16881 if ((optimize && !register_operand (op0, mode0))
16882 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16883 op0 = copy_to_mode_reg (mode0, op0);
16886 pat = GEN_FCN (icode) (target, op0);
16893 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16894 sqrtss, rsqrtss, rcpss. */
16897 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16900 tree arg0 = TREE_VALUE (arglist);
16901 rtx op1, op0 = expand_normal (arg0);
16902 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16903 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16905 if (optimize || !target
16906 || GET_MODE (target) != tmode
16907 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16908 target = gen_reg_rtx (tmode);
16910 if (VECTOR_MODE_P (mode0))
16911 op0 = safe_vector_operand (op0, mode0);
16913 if ((optimize && !register_operand (op0, mode0))
16914 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16915 op0 = copy_to_mode_reg (mode0, op0);
16918 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16919 op1 = copy_to_mode_reg (mode0, op1);
16921 pat = GEN_FCN (icode) (target, op0, op1);
16928 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16931 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16935 tree arg0 = TREE_VALUE (arglist);
16936 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16937 rtx op0 = expand_normal (arg0);
16938 rtx op1 = expand_normal (arg1);
16940 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16941 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16942 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16943 enum rtx_code comparison = d->comparison;
16945 if (VECTOR_MODE_P (mode0))
16946 op0 = safe_vector_operand (op0, mode0);
16947 if (VECTOR_MODE_P (mode1))
16948 op1 = safe_vector_operand (op1, mode1);
16950 /* Swap operands if we have a comparison that isn't available in
16952 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16954 rtx tmp = gen_reg_rtx (mode1);
16955 emit_move_insn (tmp, op1);
16960 if (optimize || !target
16961 || GET_MODE (target) != tmode
16962 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16963 target = gen_reg_rtx (tmode);
16965 if ((optimize && !register_operand (op0, mode0))
16966 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16967 op0 = copy_to_mode_reg (mode0, op0);
16968 if ((optimize && !register_operand (op1, mode1))
16969 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16970 op1 = copy_to_mode_reg (mode1, op1);
16972 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16973 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16980 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16983 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16987 tree arg0 = TREE_VALUE (arglist);
16988 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16989 rtx op0 = expand_normal (arg0);
16990 rtx op1 = expand_normal (arg1);
16992 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16993 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16994 enum rtx_code comparison = d->comparison;
16996 if (VECTOR_MODE_P (mode0))
16997 op0 = safe_vector_operand (op0, mode0);
16998 if (VECTOR_MODE_P (mode1))
16999 op1 = safe_vector_operand (op1, mode1);
17001 /* Swap operands if we have a comparison that isn't available in
17003 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17010 target = gen_reg_rtx (SImode);
17011 emit_move_insn (target, const0_rtx);
17012 target = gen_rtx_SUBREG (QImode, target, 0);
17014 if ((optimize && !register_operand (op0, mode0))
17015 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17016 op0 = copy_to_mode_reg (mode0, op0);
17017 if ((optimize && !register_operand (op1, mode1))
17018 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17019 op1 = copy_to_mode_reg (mode1, op1);
17021 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17022 pat = GEN_FCN (d->icode) (op0, op1);
17026 emit_insn (gen_rtx_SET (VOIDmode,
17027 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17028 gen_rtx_fmt_ee (comparison, QImode,
17032 return SUBREG_REG (target);
17035 /* Return the integer constant in ARG. Constrain it to be in the range
17036 of the subparts of VEC_TYPE; issue an error if not. */
17039 get_element_number (tree vec_type, tree arg)
17041 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17043 if (!host_integerp (arg, 1)
17044 || (elt = tree_low_cst (arg, 1), elt > max))
17046 error ("selector must be an integer constant in the range 0..%wi", max);
17053 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17054 ix86_expand_vector_init. We DO have language-level syntax for this, in
17055 the form of (type){ init-list }. Except that since we can't place emms
17056 instructions from inside the compiler, we can't allow the use of MMX
17057 registers unless the user explicitly asks for it. So we do *not* define
17058 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17059 we have builtins invoked by mmintrin.h that gives us license to emit
17060 these sorts of instructions. */
17063 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
17065 enum machine_mode tmode = TYPE_MODE (type);
17066 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17067 int i, n_elt = GET_MODE_NUNITS (tmode);
17068 rtvec v = rtvec_alloc (n_elt);
17070 gcc_assert (VECTOR_MODE_P (tmode));
17072 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
17074 rtx x = expand_normal (TREE_VALUE (arglist));
17075 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17078 gcc_assert (arglist == NULL);
17080 if (!target || !register_operand (target, tmode))
17081 target = gen_reg_rtx (tmode);
17083 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17087 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17088 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17089 had a language-level syntax for referencing vector elements. */
17092 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
17094 enum machine_mode tmode, mode0;
17099 arg0 = TREE_VALUE (arglist);
17100 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17102 op0 = expand_normal (arg0);
17103 elt = get_element_number (TREE_TYPE (arg0), arg1);
17105 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17106 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17107 gcc_assert (VECTOR_MODE_P (mode0));
17109 op0 = force_reg (mode0, op0);
17111 if (optimize || !target || !register_operand (target, tmode))
17112 target = gen_reg_rtx (tmode);
17114 ix86_expand_vector_extract (true, target, op0, elt);
17119 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17120 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17121 a language-level syntax for referencing vector elements. */
17124 ix86_expand_vec_set_builtin (tree arglist)
17126 enum machine_mode tmode, mode1;
17127 tree arg0, arg1, arg2;
17131 arg0 = TREE_VALUE (arglist);
17132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17133 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17135 tmode = TYPE_MODE (TREE_TYPE (arg0));
17136 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17137 gcc_assert (VECTOR_MODE_P (tmode));
17139 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17140 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17141 elt = get_element_number (TREE_TYPE (arg0), arg2);
17143 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17144 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17146 op0 = force_reg (tmode, op0);
17147 op1 = force_reg (mode1, op1);
17149 ix86_expand_vector_set (true, op0, op1, elt);
17154 /* Expand an expression EXP that calls a built-in function,
17155 with result going to TARGET if that's convenient
17156 (and in mode MODE if that's convenient).
17157 SUBTARGET may be used as the target for computing one of EXP's operands.
17158 IGNORE is nonzero if the value is to be ignored. */
17161 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17162 enum machine_mode mode ATTRIBUTE_UNUSED,
17163 int ignore ATTRIBUTE_UNUSED)
17165 const struct builtin_description *d;
17167 enum insn_code icode;
17168 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
17169 tree arglist = TREE_OPERAND (exp, 1);
17170 tree arg0, arg1, arg2;
17171 rtx op0, op1, op2, pat;
17172 enum machine_mode tmode, mode0, mode1, mode2, mode3;
17173 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17177 case IX86_BUILTIN_EMMS:
17178 emit_insn (gen_mmx_emms ());
17181 case IX86_BUILTIN_SFENCE:
17182 emit_insn (gen_sse_sfence ());
17185 case IX86_BUILTIN_MASKMOVQ:
17186 case IX86_BUILTIN_MASKMOVDQU:
17187 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17188 ? CODE_FOR_mmx_maskmovq
17189 : CODE_FOR_sse2_maskmovdqu);
17190 /* Note the arg order is different from the operand order. */
17191 arg1 = TREE_VALUE (arglist);
17192 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
17193 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17194 op0 = expand_normal (arg0);
17195 op1 = expand_normal (arg1);
17196 op2 = expand_normal (arg2);
17197 mode0 = insn_data[icode].operand[0].mode;
17198 mode1 = insn_data[icode].operand[1].mode;
17199 mode2 = insn_data[icode].operand[2].mode;
17201 op0 = force_reg (Pmode, op0);
17202 op0 = gen_rtx_MEM (mode1, op0);
17204 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17205 op0 = copy_to_mode_reg (mode0, op0);
17206 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17207 op1 = copy_to_mode_reg (mode1, op1);
17208 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17209 op2 = copy_to_mode_reg (mode2, op2);
17210 pat = GEN_FCN (icode) (op0, op1, op2);
17216 case IX86_BUILTIN_SQRTSS:
17217 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
17218 case IX86_BUILTIN_RSQRTSS:
17219 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
17220 case IX86_BUILTIN_RCPSS:
17221 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
17223 case IX86_BUILTIN_LOADUPS:
17224 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
17226 case IX86_BUILTIN_STOREUPS:
17227 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
17229 case IX86_BUILTIN_LOADHPS:
17230 case IX86_BUILTIN_LOADLPS:
17231 case IX86_BUILTIN_LOADHPD:
17232 case IX86_BUILTIN_LOADLPD:
17233 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17234 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17235 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17236 : CODE_FOR_sse2_loadlpd);
17237 arg0 = TREE_VALUE (arglist);
17238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17239 op0 = expand_normal (arg0);
17240 op1 = expand_normal (arg1);
17241 tmode = insn_data[icode].operand[0].mode;
17242 mode0 = insn_data[icode].operand[1].mode;
17243 mode1 = insn_data[icode].operand[2].mode;
17245 op0 = force_reg (mode0, op0);
17246 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17247 if (optimize || target == 0
17248 || GET_MODE (target) != tmode
17249 || !register_operand (target, tmode))
17250 target = gen_reg_rtx (tmode);
17251 pat = GEN_FCN (icode) (target, op0, op1);
17257 case IX86_BUILTIN_STOREHPS:
17258 case IX86_BUILTIN_STORELPS:
17259 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17260 : CODE_FOR_sse_storelps);
17261 arg0 = TREE_VALUE (arglist);
17262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17263 op0 = expand_normal (arg0);
17264 op1 = expand_normal (arg1);
17265 mode0 = insn_data[icode].operand[0].mode;
17266 mode1 = insn_data[icode].operand[1].mode;
17268 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17269 op1 = force_reg (mode1, op1);
17271 pat = GEN_FCN (icode) (op0, op1);
17277 case IX86_BUILTIN_MOVNTPS:
17278 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
17279 case IX86_BUILTIN_MOVNTQ:
17280 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
17282 case IX86_BUILTIN_LDMXCSR:
17283 op0 = expand_normal (TREE_VALUE (arglist));
17284 target = assign_386_stack_local (SImode, SLOT_TEMP);
17285 emit_move_insn (target, op0);
17286 emit_insn (gen_sse_ldmxcsr (target));
17289 case IX86_BUILTIN_STMXCSR:
17290 target = assign_386_stack_local (SImode, SLOT_TEMP);
17291 emit_insn (gen_sse_stmxcsr (target));
17292 return copy_to_mode_reg (SImode, target);
17294 case IX86_BUILTIN_SHUFPS:
17295 case IX86_BUILTIN_SHUFPD:
17296 icode = (fcode == IX86_BUILTIN_SHUFPS
17297 ? CODE_FOR_sse_shufps
17298 : CODE_FOR_sse2_shufpd);
17299 arg0 = TREE_VALUE (arglist);
17300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17301 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17302 op0 = expand_normal (arg0);
17303 op1 = expand_normal (arg1);
17304 op2 = expand_normal (arg2);
17305 tmode = insn_data[icode].operand[0].mode;
17306 mode0 = insn_data[icode].operand[1].mode;
17307 mode1 = insn_data[icode].operand[2].mode;
17308 mode2 = insn_data[icode].operand[3].mode;
17310 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17311 op0 = copy_to_mode_reg (mode0, op0);
17312 if ((optimize && !register_operand (op1, mode1))
17313 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17314 op1 = copy_to_mode_reg (mode1, op1);
17315 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17317 /* @@@ better error message */
17318 error ("mask must be an immediate");
17319 return gen_reg_rtx (tmode);
17321 if (optimize || target == 0
17322 || GET_MODE (target) != tmode
17323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17324 target = gen_reg_rtx (tmode);
17325 pat = GEN_FCN (icode) (target, op0, op1, op2);
17331 case IX86_BUILTIN_PSHUFW:
17332 case IX86_BUILTIN_PSHUFD:
17333 case IX86_BUILTIN_PSHUFHW:
17334 case IX86_BUILTIN_PSHUFLW:
17335 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17336 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17337 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17338 : CODE_FOR_mmx_pshufw);
17339 arg0 = TREE_VALUE (arglist);
17340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17341 op0 = expand_normal (arg0);
17342 op1 = expand_normal (arg1);
17343 tmode = insn_data[icode].operand[0].mode;
17344 mode1 = insn_data[icode].operand[1].mode;
17345 mode2 = insn_data[icode].operand[2].mode;
17347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17348 op0 = copy_to_mode_reg (mode1, op0);
17349 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17351 /* @@@ better error message */
17352 error ("mask must be an immediate");
17356 || GET_MODE (target) != tmode
17357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17358 target = gen_reg_rtx (tmode);
17359 pat = GEN_FCN (icode) (target, op0, op1);
17365 case IX86_BUILTIN_PSLLDQI128:
17366 case IX86_BUILTIN_PSRLDQI128:
17367 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17368 : CODE_FOR_sse2_lshrti3);
17369 arg0 = TREE_VALUE (arglist);
17370 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17371 op0 = expand_normal (arg0);
17372 op1 = expand_normal (arg1);
17373 tmode = insn_data[icode].operand[0].mode;
17374 mode1 = insn_data[icode].operand[1].mode;
17375 mode2 = insn_data[icode].operand[2].mode;
17377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17379 op0 = copy_to_reg (op0);
17380 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17384 error ("shift must be an immediate");
17387 target = gen_reg_rtx (V2DImode);
17388 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17394 case IX86_BUILTIN_FEMMS:
17395 emit_insn (gen_mmx_femms ());
17398 case IX86_BUILTIN_PAVGUSB:
17399 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
17401 case IX86_BUILTIN_PF2ID:
17402 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
17404 case IX86_BUILTIN_PFACC:
17405 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
17407 case IX86_BUILTIN_PFADD:
17408 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
17410 case IX86_BUILTIN_PFCMPEQ:
17411 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
17413 case IX86_BUILTIN_PFCMPGE:
17414 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
17416 case IX86_BUILTIN_PFCMPGT:
17417 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
17419 case IX86_BUILTIN_PFMAX:
17420 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
17422 case IX86_BUILTIN_PFMIN:
17423 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
17425 case IX86_BUILTIN_PFMUL:
17426 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
17428 case IX86_BUILTIN_PFRCP:
17429 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
17431 case IX86_BUILTIN_PFRCPIT1:
17432 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
17434 case IX86_BUILTIN_PFRCPIT2:
17435 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
17437 case IX86_BUILTIN_PFRSQIT1:
17438 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
17440 case IX86_BUILTIN_PFRSQRT:
17441 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
17443 case IX86_BUILTIN_PFSUB:
17444 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
17446 case IX86_BUILTIN_PFSUBR:
17447 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
17449 case IX86_BUILTIN_PI2FD:
17450 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
17452 case IX86_BUILTIN_PMULHRW:
17453 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
17455 case IX86_BUILTIN_PF2IW:
17456 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
17458 case IX86_BUILTIN_PFNACC:
17459 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
17461 case IX86_BUILTIN_PFPNACC:
17462 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
17464 case IX86_BUILTIN_PI2FW:
17465 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
17467 case IX86_BUILTIN_PSWAPDSI:
17468 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
17470 case IX86_BUILTIN_PSWAPDSF:
17471 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
17473 case IX86_BUILTIN_SQRTSD:
17474 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
17475 case IX86_BUILTIN_LOADUPD:
17476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
17477 case IX86_BUILTIN_STOREUPD:
17478 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
17480 case IX86_BUILTIN_MFENCE:
17481 emit_insn (gen_sse2_mfence ());
17483 case IX86_BUILTIN_LFENCE:
17484 emit_insn (gen_sse2_lfence ());
17487 case IX86_BUILTIN_CLFLUSH:
17488 arg0 = TREE_VALUE (arglist);
17489 op0 = expand_normal (arg0);
17490 icode = CODE_FOR_sse2_clflush;
17491 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17492 op0 = copy_to_mode_reg (Pmode, op0);
17494 emit_insn (gen_sse2_clflush (op0));
17497 case IX86_BUILTIN_MOVNTPD:
17498 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
17499 case IX86_BUILTIN_MOVNTDQ:
17500 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
17501 case IX86_BUILTIN_MOVNTI:
17502 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
17504 case IX86_BUILTIN_LOADDQU:
17505 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
17506 case IX86_BUILTIN_STOREDQU:
17507 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
17509 case IX86_BUILTIN_MONITOR:
17510 arg0 = TREE_VALUE (arglist);
17511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17513 op0 = expand_normal (arg0);
17514 op1 = expand_normal (arg1);
17515 op2 = expand_normal (arg2);
17517 op0 = copy_to_mode_reg (Pmode, op0);
17519 op1 = copy_to_mode_reg (SImode, op1);
17521 op2 = copy_to_mode_reg (SImode, op2);
17523 emit_insn (gen_sse3_monitor (op0, op1, op2));
17525 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17528 case IX86_BUILTIN_MWAIT:
17529 arg0 = TREE_VALUE (arglist);
17530 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17531 op0 = expand_normal (arg0);
17532 op1 = expand_normal (arg1);
17534 op0 = copy_to_mode_reg (SImode, op0);
17536 op1 = copy_to_mode_reg (SImode, op1);
17537 emit_insn (gen_sse3_mwait (op0, op1));
17540 case IX86_BUILTIN_LDDQU:
17541 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17544 case IX86_BUILTIN_PALIGNR:
17545 case IX86_BUILTIN_PALIGNR128:
17546 if (fcode == IX86_BUILTIN_PALIGNR)
17548 icode = CODE_FOR_ssse3_palignrdi;
17553 icode = CODE_FOR_ssse3_palignrti;
17556 arg0 = TREE_VALUE (arglist);
17557 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17558 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17559 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17560 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17561 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17562 tmode = insn_data[icode].operand[0].mode;
17563 mode1 = insn_data[icode].operand[1].mode;
17564 mode2 = insn_data[icode].operand[2].mode;
17565 mode3 = insn_data[icode].operand[3].mode;
17567 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17569 op0 = copy_to_reg (op0);
17570 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17572 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17574 op1 = copy_to_reg (op1);
17575 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17577 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17579 error ("shift must be an immediate");
17582 target = gen_reg_rtx (mode);
17583 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17590 case IX86_BUILTIN_VEC_INIT_V2SI:
17591 case IX86_BUILTIN_VEC_INIT_V4HI:
17592 case IX86_BUILTIN_VEC_INIT_V8QI:
17593 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17595 case IX86_BUILTIN_VEC_EXT_V2DF:
17596 case IX86_BUILTIN_VEC_EXT_V2DI:
17597 case IX86_BUILTIN_VEC_EXT_V4SF:
17598 case IX86_BUILTIN_VEC_EXT_V4SI:
17599 case IX86_BUILTIN_VEC_EXT_V8HI:
17600 case IX86_BUILTIN_VEC_EXT_V2SI:
17601 case IX86_BUILTIN_VEC_EXT_V4HI:
17602 return ix86_expand_vec_ext_builtin (arglist, target);
17604 case IX86_BUILTIN_VEC_SET_V8HI:
17605 case IX86_BUILTIN_VEC_SET_V4HI:
17606 return ix86_expand_vec_set_builtin (arglist);
17612 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17613 if (d->code == fcode)
17615 /* Compares are treated specially. */
17616 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17617 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17618 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17619 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17620 return ix86_expand_sse_compare (d, arglist, target);
17622 return ix86_expand_binop_builtin (d->icode, arglist, target);
17625 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17626 if (d->code == fcode)
17627 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17629 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17630 if (d->code == fcode)
17631 return ix86_expand_sse_comi (d, arglist, target);
17633 gcc_unreachable ();
17636 /* Store OPERAND to the memory after reload is completed. This means
17637 that we can't easily use assign_stack_local. */
17639 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17643 gcc_assert (reload_completed);
17644 if (TARGET_RED_ZONE)
17646 result = gen_rtx_MEM (mode,
17647 gen_rtx_PLUS (Pmode,
17649 GEN_INT (-RED_ZONE_SIZE)));
17650 emit_move_insn (result, operand);
17652 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17658 operand = gen_lowpart (DImode, operand);
17662 gen_rtx_SET (VOIDmode,
17663 gen_rtx_MEM (DImode,
17664 gen_rtx_PRE_DEC (DImode,
17665 stack_pointer_rtx)),
17669 gcc_unreachable ();
17671 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17680 split_di (&operand, 1, operands, operands + 1);
17682 gen_rtx_SET (VOIDmode,
17683 gen_rtx_MEM (SImode,
17684 gen_rtx_PRE_DEC (Pmode,
17685 stack_pointer_rtx)),
17688 gen_rtx_SET (VOIDmode,
17689 gen_rtx_MEM (SImode,
17690 gen_rtx_PRE_DEC (Pmode,
17691 stack_pointer_rtx)),
17696 /* Store HImodes as SImodes. */
17697 operand = gen_lowpart (SImode, operand);
17701 gen_rtx_SET (VOIDmode,
17702 gen_rtx_MEM (GET_MODE (operand),
17703 gen_rtx_PRE_DEC (SImode,
17704 stack_pointer_rtx)),
17708 gcc_unreachable ();
17710 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17715 /* Free operand from the memory. */
17717 ix86_free_from_memory (enum machine_mode mode)
17719 if (!TARGET_RED_ZONE)
17723 if (mode == DImode || TARGET_64BIT)
17727 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17728 to pop or add instruction if registers are available. */
17729 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17730 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17735 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17736 QImode must go into class Q_REGS.
17737 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17738 movdf to do mem-to-mem moves through integer regs. */
17740 ix86_preferred_reload_class (rtx x, enum reg_class class)
17742 enum machine_mode mode = GET_MODE (x);
17744 /* We're only allowed to return a subclass of CLASS. Many of the
17745 following checks fail for NO_REGS, so eliminate that early. */
17746 if (class == NO_REGS)
17749 /* All classes can load zeros. */
17750 if (x == CONST0_RTX (mode))
17753 /* Force constants into memory if we are loading a (nonzero) constant into
17754 an MMX or SSE register. This is because there are no MMX/SSE instructions
17755 to load from a constant. */
17757 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17760 /* Prefer SSE regs only, if we can use them for math. */
17761 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17762 return SSE_CLASS_P (class) ? class : NO_REGS;
17764 /* Floating-point constants need more complex checks. */
17765 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17767 /* General regs can load everything. */
17768 if (reg_class_subset_p (class, GENERAL_REGS))
17771 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17772 zero above. We only want to wind up preferring 80387 registers if
17773 we plan on doing computation with them. */
17775 && standard_80387_constant_p (x))
17777 /* Limit class to non-sse. */
17778 if (class == FLOAT_SSE_REGS)
17780 if (class == FP_TOP_SSE_REGS)
17782 if (class == FP_SECOND_SSE_REGS)
17783 return FP_SECOND_REG;
17784 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17791 /* Generally when we see PLUS here, it's the function invariant
17792 (plus soft-fp const_int). Which can only be computed into general
17794 if (GET_CODE (x) == PLUS)
17795 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17797 /* QImode constants are easy to load, but non-constant QImode data
17798 must go into Q_REGS. */
17799 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17801 if (reg_class_subset_p (class, Q_REGS))
17803 if (reg_class_subset_p (Q_REGS, class))
17811 /* Discourage putting floating-point values in SSE registers unless
17812 SSE math is being used, and likewise for the 387 registers. */
17814 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17816 enum machine_mode mode = GET_MODE (x);
17818 /* Restrict the output reload class to the register bank that we are doing
17819 math on. If we would like not to return a subset of CLASS, reject this
17820 alternative: if reload cannot do this, it will still use its choice. */
17821 mode = GET_MODE (x);
17822 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17823 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17825 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17827 if (class == FP_TOP_SSE_REGS)
17829 else if (class == FP_SECOND_SSE_REGS)
17830 return FP_SECOND_REG;
17832 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17838 /* If we are copying between general and FP registers, we need a memory
17839 location. The same is true for SSE and MMX registers.
17841 The macro can't work reliably when one of the CLASSES is class containing
17842 registers from multiple units (SSE, MMX, integer). We avoid this by never
17843 combining those units in single alternative in the machine description.
17844 Ensure that this constraint holds to avoid unexpected surprises.
17846 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17847 enforce these sanity checks. */
17850 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17851 enum machine_mode mode, int strict)
17853 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17854 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17855 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17856 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17857 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17858 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17860 gcc_assert (!strict);
17864 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17867 /* ??? This is a lie. We do have moves between mmx/general, and for
17868 mmx/sse2. But by saying we need secondary memory we discourage the
17869 register allocator from using the mmx registers unless needed. */
17870 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17873 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17875 /* SSE1 doesn't have any direct moves from other classes. */
17879 /* If the target says that inter-unit moves are more expensive
17880 than moving through memory, then don't generate them. */
17881 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17884 /* Between SSE and general, we have moves no larger than word size. */
17885 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17888 /* ??? For the cost of one register reformat penalty, we could use
17889 the same instructions to move SFmode and DFmode data, but the
17890 relevant move patterns don't support those alternatives. */
17891 if (mode == SFmode || mode == DFmode)
17898 /* Return true if the registers in CLASS cannot represent the change from
17899 modes FROM to TO. */
17902 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17903 enum reg_class class)
17908 /* x87 registers can't do subreg at all, as all values are reformatted
17909 to extended precision. */
17910 if (MAYBE_FLOAT_CLASS_P (class))
17913 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17915 /* Vector registers do not support QI or HImode loads. If we don't
17916 disallow a change to these modes, reload will assume it's ok to
17917 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17918 the vec_dupv4hi pattern. */
17919 if (GET_MODE_SIZE (from) < 4)
17922 /* Vector registers do not support subreg with nonzero offsets, which
17923 are otherwise valid for integer registers. Since we can't see
17924 whether we have a nonzero offset from here, prohibit all
17925 nonparadoxical subregs changing size. */
17926 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17933 /* Return the cost of moving data from a register in class CLASS1 to
17934 one in class CLASS2.
17936 It is not required that the cost always equal 2 when FROM is the same as TO;
17937 on some machines it is expensive to move between registers if they are not
17938 general registers. */
17941 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17942 enum reg_class class2)
17944 /* In case we require secondary memory, compute cost of the store followed
17945 by load. In order to avoid bad register allocation choices, we need
17946 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17948 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17952 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17953 MEMORY_MOVE_COST (mode, class1, 1));
17954 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17955 MEMORY_MOVE_COST (mode, class2, 1));
17957 /* In case of copying from general_purpose_register we may emit multiple
17958 stores followed by single load causing memory size mismatch stall.
17959 Count this as arbitrarily high cost of 20. */
17960 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17963 /* In the case of FP/MMX moves, the registers actually overlap, and we
17964 have to switch modes in order to treat them differently. */
17965 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17966 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17972 /* Moves between SSE/MMX and integer unit are expensive. */
17973 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17974 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17975 return ix86_cost->mmxsse_to_integer;
17976 if (MAYBE_FLOAT_CLASS_P (class1))
17977 return ix86_cost->fp_move;
17978 if (MAYBE_SSE_CLASS_P (class1))
17979 return ix86_cost->sse_move;
17980 if (MAYBE_MMX_CLASS_P (class1))
17981 return ix86_cost->mmx_move;
17985 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17988 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17990 /* Flags and only flags can only hold CCmode values. */
17991 if (CC_REGNO_P (regno))
17992 return GET_MODE_CLASS (mode) == MODE_CC;
17993 if (GET_MODE_CLASS (mode) == MODE_CC
17994 || GET_MODE_CLASS (mode) == MODE_RANDOM
17995 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17997 if (FP_REGNO_P (regno))
17998 return VALID_FP_MODE_P (mode);
17999 if (SSE_REGNO_P (regno))
18001 /* We implement the move patterns for all vector modes into and
18002 out of SSE registers, even when no operation instructions
18004 return (VALID_SSE_REG_MODE (mode)
18005 || VALID_SSE2_REG_MODE (mode)
18006 || VALID_MMX_REG_MODE (mode)
18007 || VALID_MMX_REG_MODE_3DNOW (mode));
18009 if (MMX_REGNO_P (regno))
18011 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18012 so if the register is available at all, then we can move data of
18013 the given mode into or out of it. */
18014 return (VALID_MMX_REG_MODE (mode)
18015 || VALID_MMX_REG_MODE_3DNOW (mode));
18018 if (mode == QImode)
18020 /* Take care for QImode values - they can be in non-QI regs,
18021 but then they do cause partial register stalls. */
18022 if (regno < 4 || TARGET_64BIT)
18024 if (!TARGET_PARTIAL_REG_STALL)
18026 return reload_in_progress || reload_completed;
18028 /* We handle both integer and floats in the general purpose registers. */
18029 else if (VALID_INT_MODE_P (mode))
18031 else if (VALID_FP_MODE_P (mode))
18033 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18034 on to use that value in smaller contexts, this can easily force a
18035 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18036 supporting DImode, allow it. */
18037 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18043 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18044 tieable integer mode. */
18047 ix86_tieable_integer_mode_p (enum machine_mode mode)
18056 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18059 return TARGET_64BIT;
18066 /* Return true if MODE1 is accessible in a register that can hold MODE2
18067 without copying. That is, all register classes that can hold MODE2
18068 can also hold MODE1. */
18071 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18073 if (mode1 == mode2)
18076 if (ix86_tieable_integer_mode_p (mode1)
18077 && ix86_tieable_integer_mode_p (mode2))
18080 /* MODE2 being XFmode implies fp stack or general regs, which means we
18081 can tie any smaller floating point modes to it. Note that we do not
18082 tie this with TFmode. */
18083 if (mode2 == XFmode)
18084 return mode1 == SFmode || mode1 == DFmode;
18086 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18087 that we can tie it with SFmode. */
18088 if (mode2 == DFmode)
18089 return mode1 == SFmode;
18091 /* If MODE2 is only appropriate for an SSE register, then tie with
18092 any other mode acceptable to SSE registers. */
18093 if (GET_MODE_SIZE (mode2) >= 8
18094 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18095 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18097 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18098 with any other mode acceptable to MMX registers. */
18099 if (GET_MODE_SIZE (mode2) == 8
18100 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18101 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18106 /* Return the cost of moving data of mode M between a
18107 register and memory. A value of 2 is the default; this cost is
18108 relative to those in `REGISTER_MOVE_COST'.
18110 If moving between registers and memory is more expensive than
18111 between two registers, you should define this macro to express the
18114 Model also increased moving costs of QImode registers in non
18118 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18120 if (FLOAT_CLASS_P (class))
18137 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18139 if (SSE_CLASS_P (class))
18142 switch (GET_MODE_SIZE (mode))
18156 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18158 if (MMX_CLASS_P (class))
18161 switch (GET_MODE_SIZE (mode))
18172 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18174 switch (GET_MODE_SIZE (mode))
18178 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18179 : ix86_cost->movzbl_load);
18181 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18182 : ix86_cost->int_store[0] + 4);
18185 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18187 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18188 if (mode == TFmode)
18190 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18191 * (((int) GET_MODE_SIZE (mode)
18192 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18196 /* Compute a (partial) cost for rtx X. Return true if the complete
18197 cost has been computed, and false if subexpressions should be
18198 scanned. In either case, *TOTAL contains the cost result. */
18201 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18203 enum machine_mode mode = GET_MODE (x);
18211 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18213 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18215 else if (flag_pic && SYMBOLIC_CONST (x)
18217 || (!GET_CODE (x) != LABEL_REF
18218 && (GET_CODE (x) != SYMBOL_REF
18219 || !SYMBOL_REF_LOCAL_P (x)))))
18226 if (mode == VOIDmode)
18229 switch (standard_80387_constant_p (x))
18234 default: /* Other constants */
18239 /* Start with (MEM (SYMBOL_REF)), since that's where
18240 it'll probably end up. Add a penalty for size. */
18241 *total = (COSTS_N_INSNS (1)
18242 + (flag_pic != 0 && !TARGET_64BIT)
18243 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18249 /* The zero extensions is often completely free on x86_64, so make
18250 it as cheap as possible. */
18251 if (TARGET_64BIT && mode == DImode
18252 && GET_MODE (XEXP (x, 0)) == SImode)
18254 else if (TARGET_ZERO_EXTEND_WITH_AND)
18255 *total = ix86_cost->add;
18257 *total = ix86_cost->movzx;
18261 *total = ix86_cost->movsx;
18265 if (GET_CODE (XEXP (x, 1)) == CONST_INT
18266 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18268 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18271 *total = ix86_cost->add;
18274 if ((value == 2 || value == 3)
18275 && ix86_cost->lea <= ix86_cost->shift_const)
18277 *total = ix86_cost->lea;
18287 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18289 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18291 if (INTVAL (XEXP (x, 1)) > 32)
18292 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18294 *total = ix86_cost->shift_const * 2;
18298 if (GET_CODE (XEXP (x, 1)) == AND)
18299 *total = ix86_cost->shift_var * 2;
18301 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18306 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18307 *total = ix86_cost->shift_const;
18309 *total = ix86_cost->shift_var;
18314 if (FLOAT_MODE_P (mode))
18316 *total = ix86_cost->fmul;
18321 rtx op0 = XEXP (x, 0);
18322 rtx op1 = XEXP (x, 1);
18324 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18326 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18327 for (nbits = 0; value != 0; value &= value - 1)
18331 /* This is arbitrary. */
18334 /* Compute costs correctly for widening multiplication. */
18335 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
18336 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
18337 == GET_MODE_SIZE (mode))
18339 int is_mulwiden = 0;
18340 enum machine_mode inner_mode = GET_MODE (op0);
18342 if (GET_CODE (op0) == GET_CODE (op1))
18343 is_mulwiden = 1, op1 = XEXP (op1, 0);
18344 else if (GET_CODE (op1) == CONST_INT)
18346 if (GET_CODE (op0) == SIGN_EXTEND)
18347 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
18350 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
18354 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
18357 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
18358 + nbits * ix86_cost->mult_bit
18359 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
18368 if (FLOAT_MODE_P (mode))
18369 *total = ix86_cost->fdiv;
18371 *total = ix86_cost->divide[MODE_INDEX (mode)];
18375 if (FLOAT_MODE_P (mode))
18376 *total = ix86_cost->fadd;
18377 else if (GET_MODE_CLASS (mode) == MODE_INT
18378 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
18380 if (GET_CODE (XEXP (x, 0)) == PLUS
18381 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
18382 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
18383 && CONSTANT_P (XEXP (x, 1)))
18385 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
18386 if (val == 2 || val == 4 || val == 8)
18388 *total = ix86_cost->lea;
18389 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18390 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
18392 *total += rtx_cost (XEXP (x, 1), outer_code);
18396 else if (GET_CODE (XEXP (x, 0)) == MULT
18397 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
18399 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
18400 if (val == 2 || val == 4 || val == 8)
18402 *total = ix86_cost->lea;
18403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18404 *total += rtx_cost (XEXP (x, 1), outer_code);
18408 else if (GET_CODE (XEXP (x, 0)) == PLUS)
18410 *total = ix86_cost->lea;
18411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18412 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18413 *total += rtx_cost (XEXP (x, 1), outer_code);
18420 if (FLOAT_MODE_P (mode))
18422 *total = ix86_cost->fadd;
18430 if (!TARGET_64BIT && mode == DImode)
18432 *total = (ix86_cost->add * 2
18433 + (rtx_cost (XEXP (x, 0), outer_code)
18434 << (GET_MODE (XEXP (x, 0)) != DImode))
18435 + (rtx_cost (XEXP (x, 1), outer_code)
18436 << (GET_MODE (XEXP (x, 1)) != DImode)));
18442 if (FLOAT_MODE_P (mode))
18444 *total = ix86_cost->fchs;
18450 if (!TARGET_64BIT && mode == DImode)
18451 *total = ix86_cost->add * 2;
18453 *total = ix86_cost->add;
18457 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18458 && XEXP (XEXP (x, 0), 1) == const1_rtx
18459 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18460 && XEXP (x, 1) == const0_rtx)
18462 /* This kind of construct is implemented using test[bwl].
18463 Treat it as if we had an AND. */
18464 *total = (ix86_cost->add
18465 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18466 + rtx_cost (const1_rtx, outer_code));
18472 if (!TARGET_SSE_MATH
18474 || (mode == DFmode && !TARGET_SSE2))
18479 if (FLOAT_MODE_P (mode))
18480 *total = ix86_cost->fabs;
18484 if (FLOAT_MODE_P (mode))
18485 *total = ix86_cost->fsqrt;
18489 if (XINT (x, 1) == UNSPEC_TP)
18500 static int current_machopic_label_num;
18502 /* Given a symbol name and its associated stub, write out the
18503 definition of the stub. */
18506 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18508 unsigned int length;
18509 char *binder_name, *symbol_name, lazy_ptr_name[32];
18510 int label = ++current_machopic_label_num;
18512 /* For 64-bit we shouldn't get here. */
18513 gcc_assert (!TARGET_64BIT);
18515 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18516 symb = (*targetm.strip_name_encoding) (symb);
18518 length = strlen (stub);
18519 binder_name = alloca (length + 32);
18520 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18522 length = strlen (symb);
18523 symbol_name = alloca (length + 32);
18524 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18526 sprintf (lazy_ptr_name, "L%d$lz", label);
18529 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18531 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18533 fprintf (file, "%s:\n", stub);
18534 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18538 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18539 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18540 fprintf (file, "\tjmp\t*%%edx\n");
18543 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18545 fprintf (file, "%s:\n", binder_name);
18549 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18550 fprintf (file, "\tpushl\t%%eax\n");
18553 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18555 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18557 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18558 fprintf (file, "%s:\n", lazy_ptr_name);
18559 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18560 fprintf (file, "\t.long %s\n", binder_name);
18564 darwin_x86_file_end (void)
18566 darwin_file_end ();
18569 #endif /* TARGET_MACHO */
18571 /* Order the registers for register allocator. */
18574 x86_order_regs_for_local_alloc (void)
18579 /* First allocate the local general purpose registers. */
18580 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18581 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18582 reg_alloc_order [pos++] = i;
18584 /* Global general purpose registers. */
18585 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18586 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18587 reg_alloc_order [pos++] = i;
18589 /* x87 registers come first in case we are doing FP math
18591 if (!TARGET_SSE_MATH)
18592 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18593 reg_alloc_order [pos++] = i;
18595 /* SSE registers. */
18596 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18597 reg_alloc_order [pos++] = i;
18598 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18599 reg_alloc_order [pos++] = i;
18601 /* x87 registers. */
18602 if (TARGET_SSE_MATH)
18603 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18604 reg_alloc_order [pos++] = i;
18606 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18607 reg_alloc_order [pos++] = i;
18609 /* Initialize the rest of array as we do not allocate some registers
18611 while (pos < FIRST_PSEUDO_REGISTER)
18612 reg_alloc_order [pos++] = 0;
18615 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18616 struct attribute_spec.handler. */
18618 ix86_handle_struct_attribute (tree *node, tree name,
18619 tree args ATTRIBUTE_UNUSED,
18620 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18623 if (DECL_P (*node))
18625 if (TREE_CODE (*node) == TYPE_DECL)
18626 type = &TREE_TYPE (*node);
18631 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18632 || TREE_CODE (*type) == UNION_TYPE)))
18634 warning (OPT_Wattributes, "%qs attribute ignored",
18635 IDENTIFIER_POINTER (name));
18636 *no_add_attrs = true;
18639 else if ((is_attribute_p ("ms_struct", name)
18640 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18641 || ((is_attribute_p ("gcc_struct", name)
18642 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18644 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18645 IDENTIFIER_POINTER (name));
18646 *no_add_attrs = true;
18653 ix86_ms_bitfield_layout_p (tree record_type)
18655 return (TARGET_MS_BITFIELD_LAYOUT &&
18656 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18657 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18660 /* Returns an expression indicating where the this parameter is
18661 located on entry to the FUNCTION. */
18664 x86_this_parameter (tree function)
18666 tree type = TREE_TYPE (function);
18670 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18671 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18674 if (ix86_function_regparm (type, function) > 0)
18678 parm = TYPE_ARG_TYPES (type);
18679 /* Figure out whether or not the function has a variable number of
18681 for (; parm; parm = TREE_CHAIN (parm))
18682 if (TREE_VALUE (parm) == void_type_node)
18684 /* If not, the this parameter is in the first argument. */
18688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18690 return gen_rtx_REG (SImode, regno);
18694 if (aggregate_value_p (TREE_TYPE (type), type))
18695 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18697 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18700 /* Determine whether x86_output_mi_thunk can succeed. */
18703 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18704 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18705 HOST_WIDE_INT vcall_offset, tree function)
18707 /* 64-bit can handle anything. */
18711 /* For 32-bit, everything's fine if we have one free register. */
18712 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18715 /* Need a free register for vcall_offset. */
18719 /* Need a free register for GOT references. */
18720 if (flag_pic && !(*targetm.binds_local_p) (function))
18723 /* Otherwise ok. */
18727 /* Output the assembler code for a thunk function. THUNK_DECL is the
18728 declaration for the thunk function itself, FUNCTION is the decl for
18729 the target function. DELTA is an immediate constant offset to be
18730 added to THIS. If VCALL_OFFSET is nonzero, the word at
18731 *(*this + vcall_offset) should be added to THIS. */
18734 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18735 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18736 HOST_WIDE_INT vcall_offset, tree function)
18739 rtx this = x86_this_parameter (function);
18742 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18743 pull it in now and let DELTA benefit. */
18746 else if (vcall_offset)
18748 /* Put the this parameter into %eax. */
18750 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18751 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18754 this_reg = NULL_RTX;
18756 /* Adjust the this parameter by a fixed constant. */
18759 xops[0] = GEN_INT (delta);
18760 xops[1] = this_reg ? this_reg : this;
18763 if (!x86_64_general_operand (xops[0], DImode))
18765 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18767 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18771 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18774 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18777 /* Adjust the this parameter by a value stored in the vtable. */
18781 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18784 int tmp_regno = 2 /* ECX */;
18785 if (lookup_attribute ("fastcall",
18786 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18787 tmp_regno = 0 /* EAX */;
18788 tmp = gen_rtx_REG (SImode, tmp_regno);
18791 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18794 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18796 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18798 /* Adjust the this parameter. */
18799 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18800 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18802 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18803 xops[0] = GEN_INT (vcall_offset);
18805 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18806 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18808 xops[1] = this_reg;
18810 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18812 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18815 /* If necessary, drop THIS back to its stack slot. */
18816 if (this_reg && this_reg != this)
18818 xops[0] = this_reg;
18820 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18823 xops[0] = XEXP (DECL_RTL (function), 0);
18826 if (!flag_pic || (*targetm.binds_local_p) (function))
18827 output_asm_insn ("jmp\t%P0", xops);
18830 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18831 tmp = gen_rtx_CONST (Pmode, tmp);
18832 tmp = gen_rtx_MEM (QImode, tmp);
18834 output_asm_insn ("jmp\t%A0", xops);
18839 if (!flag_pic || (*targetm.binds_local_p) (function))
18840 output_asm_insn ("jmp\t%P0", xops);
18845 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18846 tmp = (gen_rtx_SYMBOL_REF
18848 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18849 tmp = gen_rtx_MEM (QImode, tmp);
18851 output_asm_insn ("jmp\t%0", xops);
18854 #endif /* TARGET_MACHO */
18856 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18857 output_set_got (tmp, NULL_RTX);
18860 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18861 output_asm_insn ("jmp\t{*}%1", xops);
18867 x86_file_start (void)
18869 default_file_start ();
18871 darwin_file_start ();
18873 if (X86_FILE_START_VERSION_DIRECTIVE)
18874 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18875 if (X86_FILE_START_FLTUSED)
18876 fputs ("\t.global\t__fltused\n", asm_out_file);
18877 if (ix86_asm_dialect == ASM_INTEL)
18878 fputs ("\t.intel_syntax\n", asm_out_file);
18882 x86_field_alignment (tree field, int computed)
18884 enum machine_mode mode;
18885 tree type = TREE_TYPE (field);
18887 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18889 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18890 ? get_inner_array_type (type) : type);
18891 if (mode == DFmode || mode == DCmode
18892 || GET_MODE_CLASS (mode) == MODE_INT
18893 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18894 return MIN (32, computed);
18898 /* Output assembler code to FILE to increment profiler label # LABELNO
18899 for profiling a function entry. */
18901 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18906 #ifndef NO_PROFILE_COUNTERS
18907 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18909 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18913 #ifndef NO_PROFILE_COUNTERS
18914 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18916 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18920 #ifndef NO_PROFILE_COUNTERS
18921 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18922 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18924 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18928 #ifndef NO_PROFILE_COUNTERS
18929 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18930 PROFILE_COUNT_REGISTER);
18932 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18936 /* We don't have exact information about the insn sizes, but we may assume
18937 quite safely that we are informed about all 1 byte insns and memory
18938 address sizes. This is enough to eliminate unnecessary padding in
18942 min_insn_size (rtx insn)
18946 if (!INSN_P (insn) || !active_insn_p (insn))
18949 /* Discard alignments we've emit and jump instructions. */
18950 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18951 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18953 if (GET_CODE (insn) == JUMP_INSN
18954 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18955 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18958 /* Important case - calls are always 5 bytes.
18959 It is common to have many calls in the row. */
18960 if (GET_CODE (insn) == CALL_INSN
18961 && symbolic_reference_mentioned_p (PATTERN (insn))
18962 && !SIBLING_CALL_P (insn))
18964 if (get_attr_length (insn) <= 1)
18967 /* For normal instructions we may rely on the sizes of addresses
18968 and the presence of symbol to require 4 bytes of encoding.
18969 This is not the case for jumps where references are PC relative. */
18970 if (GET_CODE (insn) != JUMP_INSN)
18972 l = get_attr_length_address (insn);
18973 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18982 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18986 ix86_avoid_jump_misspredicts (void)
18988 rtx insn, start = get_insns ();
18989 int nbytes = 0, njumps = 0;
18992 /* Look for all minimal intervals of instructions containing 4 jumps.
18993 The intervals are bounded by START and INSN. NBYTES is the total
18994 size of instructions in the interval including INSN and not including
18995 START. When the NBYTES is smaller than 16 bytes, it is possible
18996 that the end of START and INSN ends up in the same 16byte page.
18998 The smallest offset in the page INSN can start is the case where START
18999 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19000 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19002 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19005 nbytes += min_insn_size (insn);
19007 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19008 INSN_UID (insn), min_insn_size (insn));
19009 if ((GET_CODE (insn) == JUMP_INSN
19010 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19011 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19012 || GET_CODE (insn) == CALL_INSN)
19019 start = NEXT_INSN (start);
19020 if ((GET_CODE (start) == JUMP_INSN
19021 && GET_CODE (PATTERN (start)) != ADDR_VEC
19022 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19023 || GET_CODE (start) == CALL_INSN)
19024 njumps--, isjump = 1;
19027 nbytes -= min_insn_size (start);
19029 gcc_assert (njumps >= 0);
19031 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19032 INSN_UID (start), INSN_UID (insn), nbytes);
19034 if (njumps == 3 && isjump && nbytes < 16)
19036 int padsize = 15 - nbytes + min_insn_size (insn);
19039 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19040 INSN_UID (insn), padsize);
19041 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19046 /* AMD Athlon works faster
19047 when RET is not destination of conditional jump or directly preceded
19048 by other jump instruction. We avoid the penalty by inserting NOP just
19049 before the RET instructions in such cases. */
19051 ix86_pad_returns (void)
19056 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19058 basic_block bb = e->src;
19059 rtx ret = BB_END (bb);
19061 bool replace = false;
19063 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
19064 || !maybe_hot_bb_p (bb))
19066 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19067 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
19069 if (prev && GET_CODE (prev) == CODE_LABEL)
19074 FOR_EACH_EDGE (e, ei, bb->preds)
19075 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19076 && !(e->flags & EDGE_FALLTHRU))
19081 prev = prev_active_insn (ret);
19083 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
19084 || GET_CODE (prev) == CALL_INSN))
19086 /* Empty functions get branch mispredict even when the jump destination
19087 is not visible to us. */
19088 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19093 emit_insn_before (gen_return_internal_long (), ret);
19099 /* Implement machine specific optimizations. We implement padding of returns
19100 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19104 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19105 ix86_pad_returns ();
19106 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19107 ix86_avoid_jump_misspredicts ();
19110 /* Return nonzero when QImode register that must be represented via REX prefix
19113 x86_extended_QIreg_mentioned_p (rtx insn)
19116 extract_insn_cached (insn);
19117 for (i = 0; i < recog_data.n_operands; i++)
19118 if (REG_P (recog_data.operand[i])
19119 && REGNO (recog_data.operand[i]) >= 4)
19124 /* Return nonzero when P points to register encoded via REX prefix.
19125 Called via for_each_rtx. */
19127 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19129 unsigned int regno;
19132 regno = REGNO (*p);
19133 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19136 /* Return true when INSN mentions register that must be encoded using REX
19139 x86_extended_reg_mentioned_p (rtx insn)
19141 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19144 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19145 optabs would emit if we didn't have TFmode patterns. */
19148 x86_emit_floatuns (rtx operands[2])
19150 rtx neglab, donelab, i0, i1, f0, in, out;
19151 enum machine_mode mode, inmode;
19153 inmode = GET_MODE (operands[1]);
19154 gcc_assert (inmode == SImode || inmode == DImode);
19157 in = force_reg (inmode, operands[1]);
19158 mode = GET_MODE (out);
19159 neglab = gen_label_rtx ();
19160 donelab = gen_label_rtx ();
19161 i1 = gen_reg_rtx (Pmode);
19162 f0 = gen_reg_rtx (mode);
19164 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
19166 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
19167 emit_jump_insn (gen_jump (donelab));
19170 emit_label (neglab);
19172 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19173 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19174 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19175 expand_float (f0, i0, 0);
19176 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19178 emit_label (donelab);
19181 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19182 with all elements equal to VAR. Return true if successful. */
19185 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19186 rtx target, rtx val)
19188 enum machine_mode smode, wsmode, wvmode;
19203 val = force_reg (GET_MODE_INNER (mode), val);
19204 x = gen_rtx_VEC_DUPLICATE (mode, val);
19205 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19211 if (TARGET_SSE || TARGET_3DNOW_A)
19213 val = gen_lowpart (SImode, val);
19214 x = gen_rtx_TRUNCATE (HImode, val);
19215 x = gen_rtx_VEC_DUPLICATE (mode, x);
19216 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19238 /* Extend HImode to SImode using a paradoxical SUBREG. */
19239 tmp1 = gen_reg_rtx (SImode);
19240 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19241 /* Insert the SImode value as low element of V4SImode vector. */
19242 tmp2 = gen_reg_rtx (V4SImode);
19243 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19244 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19245 CONST0_RTX (V4SImode),
19247 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19248 /* Cast the V4SImode vector back to a V8HImode vector. */
19249 tmp1 = gen_reg_rtx (V8HImode);
19250 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19251 /* Duplicate the low short through the whole low SImode word. */
19252 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19253 /* Cast the V8HImode vector back to a V4SImode vector. */
19254 tmp2 = gen_reg_rtx (V4SImode);
19255 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19256 /* Replicate the low element of the V4SImode vector. */
19257 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19258 /* Cast the V2SImode back to V8HImode, and store in target. */
19259 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19270 /* Extend QImode to SImode using a paradoxical SUBREG. */
19271 tmp1 = gen_reg_rtx (SImode);
19272 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19273 /* Insert the SImode value as low element of V4SImode vector. */
19274 tmp2 = gen_reg_rtx (V4SImode);
19275 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19276 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19277 CONST0_RTX (V4SImode),
19279 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19280 /* Cast the V4SImode vector back to a V16QImode vector. */
19281 tmp1 = gen_reg_rtx (V16QImode);
19282 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19283 /* Duplicate the low byte through the whole low SImode word. */
19284 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19285 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19286 /* Cast the V16QImode vector back to a V4SImode vector. */
19287 tmp2 = gen_reg_rtx (V4SImode);
19288 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19289 /* Replicate the low element of the V4SImode vector. */
19290 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19291 /* Cast the V2SImode back to V16QImode, and store in target. */
19292 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19300 /* Replicate the value once into the next wider mode and recurse. */
19301 val = convert_modes (wsmode, smode, val, true);
19302 x = expand_simple_binop (wsmode, ASHIFT, val,
19303 GEN_INT (GET_MODE_BITSIZE (smode)),
19304 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19305 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19307 x = gen_reg_rtx (wvmode);
19308 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19309 gcc_unreachable ();
19310 emit_move_insn (target, gen_lowpart (mode, x));
19318 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19319 whose ONE_VAR element is VAR, and other elements are zero. Return true
19323 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19324 rtx target, rtx var, int one_var)
19326 enum machine_mode vsimode;
19342 var = force_reg (GET_MODE_INNER (mode), var);
19343 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
19344 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19349 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
19350 new_target = gen_reg_rtx (mode);
19352 new_target = target;
19353 var = force_reg (GET_MODE_INNER (mode), var);
19354 x = gen_rtx_VEC_DUPLICATE (mode, var);
19355 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
19356 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
19359 /* We need to shuffle the value to the correct position, so
19360 create a new pseudo to store the intermediate result. */
19362 /* With SSE2, we can use the integer shuffle insns. */
19363 if (mode != V4SFmode && TARGET_SSE2)
19365 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
19367 GEN_INT (one_var == 1 ? 0 : 1),
19368 GEN_INT (one_var == 2 ? 0 : 1),
19369 GEN_INT (one_var == 3 ? 0 : 1)));
19370 if (target != new_target)
19371 emit_move_insn (target, new_target);
19375 /* Otherwise convert the intermediate result to V4SFmode and
19376 use the SSE1 shuffle instructions. */
19377 if (mode != V4SFmode)
19379 tmp = gen_reg_rtx (V4SFmode);
19380 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
19385 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
19387 GEN_INT (one_var == 1 ? 0 : 1),
19388 GEN_INT (one_var == 2 ? 0+4 : 1+4),
19389 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
19391 if (mode != V4SFmode)
19392 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
19393 else if (tmp != target)
19394 emit_move_insn (target, tmp);
19396 else if (target != new_target)
19397 emit_move_insn (target, new_target);
19402 vsimode = V4SImode;
19408 vsimode = V2SImode;
19414 /* Zero extend the variable element to SImode and recurse. */
19415 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19417 x = gen_reg_rtx (vsimode);
19418 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19420 gcc_unreachable ();
19422 emit_move_insn (target, gen_lowpart (mode, x));
19430 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19431 consisting of the values in VALS. It is known that all elements
19432 except ONE_VAR are constants. Return true if successful. */
19435 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19436 rtx target, rtx vals, int one_var)
19438 rtx var = XVECEXP (vals, 0, one_var);
19439 enum machine_mode wmode;
19442 const_vec = copy_rtx (vals);
19443 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19444 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19452 /* For the two element vectors, it's just as easy to use
19453 the general case. */
19469 /* There's no way to set one QImode entry easily. Combine
19470 the variable value with its adjacent constant value, and
19471 promote to an HImode set. */
19472 x = XVECEXP (vals, 0, one_var ^ 1);
19475 var = convert_modes (HImode, QImode, var, true);
19476 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19478 x = GEN_INT (INTVAL (x) & 0xff);
19482 var = convert_modes (HImode, QImode, var, true);
19483 x = gen_int_mode (INTVAL (x) << 8, HImode);
19485 if (x != const0_rtx)
19486 var = expand_simple_binop (HImode, IOR, var, x, var,
19487 1, OPTAB_LIB_WIDEN);
19489 x = gen_reg_rtx (wmode);
19490 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19491 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19493 emit_move_insn (target, gen_lowpart (mode, x));
19500 emit_move_insn (target, const_vec);
19501 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19505 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19506 all values variable, and none identical. */
19509 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19510 rtx target, rtx vals)
19512 enum machine_mode half_mode = GET_MODE_INNER (mode);
19513 rtx op0 = NULL, op1 = NULL;
19514 bool use_vec_concat = false;
19520 if (!mmx_ok && !TARGET_SSE)
19526 /* For the two element vectors, we always implement VEC_CONCAT. */
19527 op0 = XVECEXP (vals, 0, 0);
19528 op1 = XVECEXP (vals, 0, 1);
19529 use_vec_concat = true;
19533 half_mode = V2SFmode;
19536 half_mode = V2SImode;
19542 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19543 Recurse to load the two halves. */
19545 op0 = gen_reg_rtx (half_mode);
19546 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19547 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19549 op1 = gen_reg_rtx (half_mode);
19550 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19551 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19553 use_vec_concat = true;
19564 gcc_unreachable ();
19567 if (use_vec_concat)
19569 if (!register_operand (op0, half_mode))
19570 op0 = force_reg (half_mode, op0);
19571 if (!register_operand (op1, half_mode))
19572 op1 = force_reg (half_mode, op1);
19574 emit_insn (gen_rtx_SET (VOIDmode, target,
19575 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19579 int i, j, n_elts, n_words, n_elt_per_word;
19580 enum machine_mode inner_mode;
19581 rtx words[4], shift;
19583 inner_mode = GET_MODE_INNER (mode);
19584 n_elts = GET_MODE_NUNITS (mode);
19585 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19586 n_elt_per_word = n_elts / n_words;
19587 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19589 for (i = 0; i < n_words; ++i)
19591 rtx word = NULL_RTX;
19593 for (j = 0; j < n_elt_per_word; ++j)
19595 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19596 elt = convert_modes (word_mode, inner_mode, elt, true);
19602 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19603 word, 1, OPTAB_LIB_WIDEN);
19604 word = expand_simple_binop (word_mode, IOR, word, elt,
19605 word, 1, OPTAB_LIB_WIDEN);
19613 emit_move_insn (target, gen_lowpart (mode, words[0]));
19614 else if (n_words == 2)
19616 rtx tmp = gen_reg_rtx (mode);
19617 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19618 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19619 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19620 emit_move_insn (target, tmp);
19622 else if (n_words == 4)
19624 rtx tmp = gen_reg_rtx (V4SImode);
19625 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19626 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19627 emit_move_insn (target, gen_lowpart (mode, tmp));
19630 gcc_unreachable ();
19634 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19635 instructions unless MMX_OK is true. */
19638 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19640 enum machine_mode mode = GET_MODE (target);
19641 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19642 int n_elts = GET_MODE_NUNITS (mode);
19643 int n_var = 0, one_var = -1;
19644 bool all_same = true, all_const_zero = true;
19648 for (i = 0; i < n_elts; ++i)
19650 x = XVECEXP (vals, 0, i);
19651 if (!CONSTANT_P (x))
19652 n_var++, one_var = i;
19653 else if (x != CONST0_RTX (inner_mode))
19654 all_const_zero = false;
19655 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19659 /* Constants are best loaded from the constant pool. */
19662 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19666 /* If all values are identical, broadcast the value. */
19668 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19669 XVECEXP (vals, 0, 0)))
19672 /* Values where only one field is non-constant are best loaded from
19673 the pool and overwritten via move later. */
19677 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19678 XVECEXP (vals, 0, one_var),
19682 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19686 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19690 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19692 enum machine_mode mode = GET_MODE (target);
19693 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19694 bool use_vec_merge = false;
19703 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19704 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19706 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19708 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19709 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19719 /* For the two element vectors, we implement a VEC_CONCAT with
19720 the extraction of the other element. */
19722 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19723 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19726 op0 = val, op1 = tmp;
19728 op0 = tmp, op1 = val;
19730 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19731 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19739 use_vec_merge = true;
19743 /* tmp = target = A B C D */
19744 tmp = copy_to_reg (target);
19745 /* target = A A B B */
19746 emit_insn (gen_sse_unpcklps (target, target, target));
19747 /* target = X A B B */
19748 ix86_expand_vector_set (false, target, val, 0);
19749 /* target = A X C D */
19750 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19751 GEN_INT (1), GEN_INT (0),
19752 GEN_INT (2+4), GEN_INT (3+4)));
19756 /* tmp = target = A B C D */
19757 tmp = copy_to_reg (target);
19758 /* tmp = X B C D */
19759 ix86_expand_vector_set (false, tmp, val, 0);
19760 /* target = A B X D */
19761 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19762 GEN_INT (0), GEN_INT (1),
19763 GEN_INT (0+4), GEN_INT (3+4)));
19767 /* tmp = target = A B C D */
19768 tmp = copy_to_reg (target);
19769 /* tmp = X B C D */
19770 ix86_expand_vector_set (false, tmp, val, 0);
19771 /* target = A B X D */
19772 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19773 GEN_INT (0), GEN_INT (1),
19774 GEN_INT (2+4), GEN_INT (0+4)));
19778 gcc_unreachable ();
19783 /* Element 0 handled by vec_merge below. */
19786 use_vec_merge = true;
19792 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19793 store into element 0, then shuffle them back. */
19797 order[0] = GEN_INT (elt);
19798 order[1] = const1_rtx;
19799 order[2] = const2_rtx;
19800 order[3] = GEN_INT (3);
19801 order[elt] = const0_rtx;
19803 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19804 order[1], order[2], order[3]));
19806 ix86_expand_vector_set (false, target, val, 0);
19808 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19809 order[1], order[2], order[3]));
19813 /* For SSE1, we have to reuse the V4SF code. */
19814 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19815 gen_lowpart (SFmode, val), elt);
19820 use_vec_merge = TARGET_SSE2;
19823 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19834 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19835 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19836 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19840 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19842 emit_move_insn (mem, target);
19844 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19845 emit_move_insn (tmp, val);
19847 emit_move_insn (target, mem);
19852 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19854 enum machine_mode mode = GET_MODE (vec);
19855 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19856 bool use_vec_extr = false;
19869 use_vec_extr = true;
19881 tmp = gen_reg_rtx (mode);
19882 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19883 GEN_INT (elt), GEN_INT (elt),
19884 GEN_INT (elt+4), GEN_INT (elt+4)));
19888 tmp = gen_reg_rtx (mode);
19889 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19893 gcc_unreachable ();
19896 use_vec_extr = true;
19911 tmp = gen_reg_rtx (mode);
19912 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19913 GEN_INT (elt), GEN_INT (elt),
19914 GEN_INT (elt), GEN_INT (elt)));
19918 tmp = gen_reg_rtx (mode);
19919 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19923 gcc_unreachable ();
19926 use_vec_extr = true;
19931 /* For SSE1, we have to reuse the V4SF code. */
19932 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19933 gen_lowpart (V4SFmode, vec), elt);
19939 use_vec_extr = TARGET_SSE2;
19942 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19947 /* ??? Could extract the appropriate HImode element and shift. */
19954 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19955 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19957 /* Let the rtl optimizers know about the zero extension performed. */
19958 if (inner_mode == HImode)
19960 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19961 target = gen_lowpart (SImode, target);
19964 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19968 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19970 emit_move_insn (mem, vec);
19972 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19973 emit_move_insn (target, tmp);
19977 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19978 pattern to reduce; DEST is the destination; IN is the input vector. */
19981 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19983 rtx tmp1, tmp2, tmp3;
19985 tmp1 = gen_reg_rtx (V4SFmode);
19986 tmp2 = gen_reg_rtx (V4SFmode);
19987 tmp3 = gen_reg_rtx (V4SFmode);
19989 emit_insn (gen_sse_movhlps (tmp1, in, in));
19990 emit_insn (fn (tmp2, tmp1, in));
19992 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19993 GEN_INT (1), GEN_INT (1),
19994 GEN_INT (1+4), GEN_INT (1+4)));
19995 emit_insn (fn (dest, tmp2, tmp3));
19998 /* Target hook for scalar_mode_supported_p. */
20000 ix86_scalar_mode_supported_p (enum machine_mode mode)
20002 if (DECIMAL_FLOAT_MODE_P (mode))
20005 return default_scalar_mode_supported_p (mode);
20008 /* Implements target hook vector_mode_supported_p. */
20010 ix86_vector_mode_supported_p (enum machine_mode mode)
20012 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20014 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20016 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20018 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20023 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20025 We do this in the new i386 backend to maintain source compatibility
20026 with the old cc0-based compiler. */
20029 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20030 tree inputs ATTRIBUTE_UNUSED,
20033 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20035 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20037 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
20042 /* Return true if this goes in small data/bss. */
20045 ix86_in_large_data_p (tree exp)
20047 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20050 /* Functions are never large data. */
20051 if (TREE_CODE (exp) == FUNCTION_DECL)
20054 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20056 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20057 if (strcmp (section, ".ldata") == 0
20058 || strcmp (section, ".lbss") == 0)
20064 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20066 /* If this is an incomplete type with size 0, then we can't put it
20067 in data because it might be too big when completed. */
20068 if (!size || size > ix86_section_threshold)
20075 ix86_encode_section_info (tree decl, rtx rtl, int first)
20077 default_encode_section_info (decl, rtl, first);
20079 if (TREE_CODE (decl) == VAR_DECL
20080 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20081 && ix86_in_large_data_p (decl))
20082 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20085 /* Worker function for REVERSE_CONDITION. */
20088 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20090 return (mode != CCFPmode && mode != CCFPUmode
20091 ? reverse_condition (code)
20092 : reverse_condition_maybe_unordered (code));
20095 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20099 output_387_reg_move (rtx insn, rtx *operands)
20101 if (REG_P (operands[1])
20102 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20104 if (REGNO (operands[0]) == FIRST_STACK_REG)
20105 return output_387_ffreep (operands, 0);
20106 return "fstp\t%y0";
20108 if (STACK_TOP_P (operands[0]))
20109 return "fld%z1\t%y1";
20113 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20114 FP status register is set. */
20117 ix86_emit_fp_unordered_jump (rtx label)
20119 rtx reg = gen_reg_rtx (HImode);
20122 emit_insn (gen_x86_fnstsw_1 (reg));
20124 if (TARGET_USE_SAHF)
20126 emit_insn (gen_x86_sahf_1 (reg));
20128 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20129 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20133 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20135 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20136 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20139 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20140 gen_rtx_LABEL_REF (VOIDmode, label),
20142 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20143 emit_jump_insn (temp);
20146 /* Output code to perform a log1p XFmode calculation. */
20148 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20150 rtx label1 = gen_label_rtx ();
20151 rtx label2 = gen_label_rtx ();
20153 rtx tmp = gen_reg_rtx (XFmode);
20154 rtx tmp2 = gen_reg_rtx (XFmode);
20156 emit_insn (gen_absxf2 (tmp, op1));
20157 emit_insn (gen_cmpxf (tmp,
20158 CONST_DOUBLE_FROM_REAL_VALUE (
20159 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20161 emit_jump_insn (gen_bge (label1));
20163 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20164 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
20165 emit_jump (label2);
20167 emit_label (label1);
20168 emit_move_insn (tmp, CONST1_RTX (XFmode));
20169 emit_insn (gen_addxf3 (tmp, op1, tmp));
20170 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20171 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
20173 emit_label (label2);
20176 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20179 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20182 /* With Binutils 2.15, the "@unwind" marker must be specified on
20183 every occurrence of the ".eh_frame" section, not just the first
20186 && strcmp (name, ".eh_frame") == 0)
20188 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20189 flags & SECTION_WRITE ? "aw" : "a");
20192 default_elf_asm_named_section (name, flags, decl);
20195 /* Return the mangling of TYPE if it is an extended fundamental type. */
20197 static const char *
20198 ix86_mangle_fundamental_type (tree type)
20200 switch (TYPE_MODE (type))
20203 /* __float128 is "g". */
20206 /* "long double" or __float80 is "e". */
20213 /* For 32-bit code we can save PIC register setup by using
20214 __stack_chk_fail_local hidden function instead of calling
20215 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20216 register, so it is better to call __stack_chk_fail directly. */
20219 ix86_stack_protect_fail (void)
20221 return TARGET_64BIT
20222 ? default_external_stack_protect_fail ()
20223 : default_hidden_stack_protect_fail ();
20226 /* Select a format to encode pointers in exception handling data. CODE
20227 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20228 true if the symbol may be affected by dynamic relocations.
20230 ??? All x86 object file formats are capable of representing this.
20231 After all, the relocation needed is the same as for the call insn.
20232 Whether or not a particular assembler allows us to enter such, I
20233 guess we'll have to see. */
20235 asm_preferred_eh_data_format (int code, int global)
20239 int type = DW_EH_PE_sdata8;
20241 || ix86_cmodel == CM_SMALL_PIC
20242 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20243 type = DW_EH_PE_sdata4;
20244 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20246 if (ix86_cmodel == CM_SMALL
20247 || (ix86_cmodel == CM_MEDIUM && code))
20248 return DW_EH_PE_udata4;
20249 return DW_EH_PE_absptr;
20252 /* Expand copysign from SIGN to the positive value ABS_VALUE
20253 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20256 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20258 enum machine_mode mode = GET_MODE (sign);
20259 rtx sgn = gen_reg_rtx (mode);
20260 if (mask == NULL_RTX)
20262 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20263 if (!VECTOR_MODE_P (mode))
20265 /* We need to generate a scalar mode mask in this case. */
20266 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20267 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20268 mask = gen_reg_rtx (mode);
20269 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20273 mask = gen_rtx_NOT (mode, mask);
20274 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20275 gen_rtx_AND (mode, mask, sign)));
20276 emit_insn (gen_rtx_SET (VOIDmode, result,
20277 gen_rtx_IOR (mode, abs_value, sgn)));
20280 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20281 mask for masking out the sign-bit is stored in *SMASK, if that is
20284 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20286 enum machine_mode mode = GET_MODE (op0);
20289 xa = gen_reg_rtx (mode);
20290 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20291 if (!VECTOR_MODE_P (mode))
20293 /* We need to generate a scalar mode mask in this case. */
20294 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20295 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20296 mask = gen_reg_rtx (mode);
20297 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20299 emit_insn (gen_rtx_SET (VOIDmode, xa,
20300 gen_rtx_AND (mode, op0, mask)));
20308 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20309 swapping the operands if SWAP_OPERANDS is true. The expanded
20310 code is a forward jump to a newly created label in case the
20311 comparison is true. The generated label rtx is returned. */
20313 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20314 bool swap_operands)
20325 label = gen_label_rtx ();
20326 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20327 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20328 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
20329 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
20330 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20331 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
20332 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20333 JUMP_LABEL (tmp) = label;
20338 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20339 using comparison code CODE. Operands are swapped for the comparison if
20340 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20342 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
20343 bool swap_operands)
20345 enum machine_mode mode = GET_MODE (op0);
20346 rtx mask = gen_reg_rtx (mode);
20355 if (mode == DFmode)
20356 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
20357 gen_rtx_fmt_ee (code, mode, op0, op1)));
20359 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
20360 gen_rtx_fmt_ee (code, mode, op0, op1)));
20365 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20366 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20368 ix86_gen_TWO52 (enum machine_mode mode)
20370 REAL_VALUE_TYPE TWO52r;
20373 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
20374 TWO52 = const_double_from_real_value (TWO52r, mode);
20375 TWO52 = force_reg (mode, TWO52);
20380 /* Expand SSE sequence for computing lround from OP1 storing
20383 ix86_expand_lround (rtx op0, rtx op1)
20385 /* C code for the stuff we're doing below:
20386 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20389 enum machine_mode mode = GET_MODE (op1);
20390 const struct real_format *fmt;
20391 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20394 /* load nextafter (0.5, 0.0) */
20395 fmt = REAL_MODE_FORMAT (mode);
20396 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20397 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20399 /* adj = copysign (0.5, op1) */
20400 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
20401 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
20403 /* adj = op1 + adj */
20404 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
20406 /* op0 = (imode)adj */
20407 expand_fix (op0, adj, 0);
20410 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20413 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
20415 /* C code for the stuff we're doing below (for do_floor):
20417 xi -= (double)xi > op1 ? 1 : 0;
20420 enum machine_mode fmode = GET_MODE (op1);
20421 enum machine_mode imode = GET_MODE (op0);
20422 rtx ireg, freg, label, tmp;
20424 /* reg = (long)op1 */
20425 ireg = gen_reg_rtx (imode);
20426 expand_fix (ireg, op1, 0);
20428 /* freg = (double)reg */
20429 freg = gen_reg_rtx (fmode);
20430 expand_float (freg, ireg, 0);
20432 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20433 label = ix86_expand_sse_compare_and_jump (UNLE,
20434 freg, op1, !do_floor);
20435 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
20436 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
20437 emit_move_insn (ireg, tmp);
20439 emit_label (label);
20440 LABEL_NUSES (label) = 1;
20442 emit_move_insn (op0, ireg);
20445 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20446 result in OPERAND0. */
20448 ix86_expand_rint (rtx operand0, rtx operand1)
20450 /* C code for the stuff we're doing below:
20451 xa = fabs (operand1);
20452 if (!isless (xa, 2**52))
20454 xa = xa + 2**52 - 2**52;
20455 return copysign (xa, operand1);
20457 enum machine_mode mode = GET_MODE (operand0);
20458 rtx res, xa, label, TWO52, mask;
20460 res = gen_reg_rtx (mode);
20461 emit_move_insn (res, operand1);
20463 /* xa = abs (operand1) */
20464 xa = ix86_expand_sse_fabs (res, &mask);
20466 /* if (!isless (xa, TWO52)) goto label; */
20467 TWO52 = ix86_gen_TWO52 (mode);
20468 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20470 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20471 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20473 ix86_sse_copysign_to_positive (res, xa, res, mask);
20475 emit_label (label);
20476 LABEL_NUSES (label) = 1;
20478 emit_move_insn (operand0, res);
20481 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20484 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
20486 /* C code for the stuff we expand below.
20487 double xa = fabs (x), x2;
20488 if (!isless (xa, TWO52))
20490 xa = xa + TWO52 - TWO52;
20491 x2 = copysign (xa, x);
20500 enum machine_mode mode = GET_MODE (operand0);
20501 rtx xa, TWO52, tmp, label, one, res, mask;
20503 TWO52 = ix86_gen_TWO52 (mode);
20505 /* Temporary for holding the result, initialized to the input
20506 operand to ease control flow. */
20507 res = gen_reg_rtx (mode);
20508 emit_move_insn (res, operand1);
20510 /* xa = abs (operand1) */
20511 xa = ix86_expand_sse_fabs (res, &mask);
20513 /* if (!isless (xa, TWO52)) goto label; */
20514 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20516 /* xa = xa + TWO52 - TWO52; */
20517 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20518 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20520 /* xa = copysign (xa, operand1) */
20521 ix86_sse_copysign_to_positive (xa, xa, res, mask);
20523 /* generate 1.0 or -1.0 */
20524 one = force_reg (mode,
20525 const_double_from_real_value (do_floor
20526 ? dconst1 : dconstm1, mode));
20528 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20529 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20530 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20531 gen_rtx_AND (mode, one, tmp)));
20532 /* We always need to subtract here to preserve signed zero. */
20533 tmp = expand_simple_binop (mode, MINUS,
20534 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20535 emit_move_insn (res, tmp);
20537 emit_label (label);
20538 LABEL_NUSES (label) = 1;
20540 emit_move_insn (operand0, res);
20543 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20546 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
20548 /* C code for the stuff we expand below.
20549 double xa = fabs (x), x2;
20550 if (!isless (xa, TWO52))
20552 x2 = (double)(long)x;
20559 if (HONOR_SIGNED_ZEROS (mode))
20560 return copysign (x2, x);
20563 enum machine_mode mode = GET_MODE (operand0);
20564 rtx xa, xi, TWO52, tmp, label, one, res, mask;
20566 TWO52 = ix86_gen_TWO52 (mode);
20568 /* Temporary for holding the result, initialized to the input
20569 operand to ease control flow. */
20570 res = gen_reg_rtx (mode);
20571 emit_move_insn (res, operand1);
20573 /* xa = abs (operand1) */
20574 xa = ix86_expand_sse_fabs (res, &mask);
20576 /* if (!isless (xa, TWO52)) goto label; */
20577 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20579 /* xa = (double)(long)x */
20580 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20581 expand_fix (xi, res, 0);
20582 expand_float (xa, xi, 0);
20585 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20587 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20588 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20589 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20590 gen_rtx_AND (mode, one, tmp)));
20591 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
20592 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20593 emit_move_insn (res, tmp);
20595 if (HONOR_SIGNED_ZEROS (mode))
20596 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20598 emit_label (label);
20599 LABEL_NUSES (label) = 1;
20601 emit_move_insn (operand0, res);
20604 /* Expand SSE sequence for computing round from OPERAND1 storing
20605 into OPERAND0. Sequence that works without relying on DImode truncation
20606 via cvttsd2siq that is only available on 64bit targets. */
20608 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
20610 /* C code for the stuff we expand below.
20611 double xa = fabs (x), xa2, x2;
20612 if (!isless (xa, TWO52))
20614 Using the absolute value and copying back sign makes
20615 -0.0 -> -0.0 correct.
20616 xa2 = xa + TWO52 - TWO52;
20621 else if (dxa > 0.5)
20623 x2 = copysign (xa2, x);
20626 enum machine_mode mode = GET_MODE (operand0);
20627 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
20629 TWO52 = ix86_gen_TWO52 (mode);
20631 /* Temporary for holding the result, initialized to the input
20632 operand to ease control flow. */
20633 res = gen_reg_rtx (mode);
20634 emit_move_insn (res, operand1);
20636 /* xa = abs (operand1) */
20637 xa = ix86_expand_sse_fabs (res, &mask);
20639 /* if (!isless (xa, TWO52)) goto label; */
20640 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20642 /* xa2 = xa + TWO52 - TWO52; */
20643 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20644 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
20646 /* dxa = xa2 - xa; */
20647 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
20649 /* generate 0.5, 1.0 and -0.5 */
20650 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
20651 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
20652 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
20656 tmp = gen_reg_rtx (mode);
20657 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
20658 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
20659 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20660 gen_rtx_AND (mode, one, tmp)));
20661 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20662 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
20663 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
20664 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20665 gen_rtx_AND (mode, one, tmp)));
20666 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20668 /* res = copysign (xa2, operand1) */
20669 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
20671 emit_label (label);
20672 LABEL_NUSES (label) = 1;
20674 emit_move_insn (operand0, res);
20677 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20680 ix86_expand_trunc (rtx operand0, rtx operand1)
20682 /* C code for SSE variant we expand below.
20683 double xa = fabs (x), x2;
20684 if (!isless (xa, TWO52))
20686 x2 = (double)(long)x;
20687 if (HONOR_SIGNED_ZEROS (mode))
20688 return copysign (x2, x);
20691 enum machine_mode mode = GET_MODE (operand0);
20692 rtx xa, xi, TWO52, label, res, mask;
20694 TWO52 = ix86_gen_TWO52 (mode);
20696 /* Temporary for holding the result, initialized to the input
20697 operand to ease control flow. */
20698 res = gen_reg_rtx (mode);
20699 emit_move_insn (res, operand1);
20701 /* xa = abs (operand1) */
20702 xa = ix86_expand_sse_fabs (res, &mask);
20704 /* if (!isless (xa, TWO52)) goto label; */
20705 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20707 /* x = (double)(long)x */
20708 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20709 expand_fix (xi, res, 0);
20710 expand_float (res, xi, 0);
20712 if (HONOR_SIGNED_ZEROS (mode))
20713 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20715 emit_label (label);
20716 LABEL_NUSES (label) = 1;
20718 emit_move_insn (operand0, res);
20721 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20724 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
20726 enum machine_mode mode = GET_MODE (operand0);
20727 rtx xa, mask, TWO52, label, one, res, smask, tmp;
20729 /* C code for SSE variant we expand below.
20730 double xa = fabs (x), x2;
20731 if (!isless (xa, TWO52))
20733 xa2 = xa + TWO52 - TWO52;
20737 x2 = copysign (xa2, x);
20741 TWO52 = ix86_gen_TWO52 (mode);
20743 /* Temporary for holding the result, initialized to the input
20744 operand to ease control flow. */
20745 res = gen_reg_rtx (mode);
20746 emit_move_insn (res, operand1);
20748 /* xa = abs (operand1) */
20749 xa = ix86_expand_sse_fabs (res, &smask);
20751 /* if (!isless (xa, TWO52)) goto label; */
20752 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20754 /* res = xa + TWO52 - TWO52; */
20755 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20756 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
20757 emit_move_insn (res, tmp);
20760 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20762 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
20763 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
20764 emit_insn (gen_rtx_SET (VOIDmode, mask,
20765 gen_rtx_AND (mode, mask, one)));
20766 tmp = expand_simple_binop (mode, MINUS,
20767 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
20768 emit_move_insn (res, tmp);
20770 /* res = copysign (res, operand1) */
20771 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
20773 emit_label (label);
20774 LABEL_NUSES (label) = 1;
20776 emit_move_insn (operand0, res);
20779 /* Expand SSE sequence for computing round from OPERAND1 storing
20782 ix86_expand_round (rtx operand0, rtx operand1)
20784 /* C code for the stuff we're doing below:
20785 double xa = fabs (x);
20786 if (!isless (xa, TWO52))
20788 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20789 return copysign (xa, x);
20791 enum machine_mode mode = GET_MODE (operand0);
20792 rtx res, TWO52, xa, label, xi, half, mask;
20793 const struct real_format *fmt;
20794 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20796 /* Temporary for holding the result, initialized to the input
20797 operand to ease control flow. */
20798 res = gen_reg_rtx (mode);
20799 emit_move_insn (res, operand1);
20801 TWO52 = ix86_gen_TWO52 (mode);
20802 xa = ix86_expand_sse_fabs (res, &mask);
20803 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20805 /* load nextafter (0.5, 0.0) */
20806 fmt = REAL_MODE_FORMAT (mode);
20807 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20808 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20810 /* xa = xa + 0.5 */
20811 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20812 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20814 /* xa = (double)(int64_t)xa */
20815 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20816 expand_fix (xi, xa, 0);
20817 expand_float (xa, xi, 0);
20819 /* res = copysign (xa, operand1) */
20820 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20822 emit_label (label);
20823 LABEL_NUSES (label) = 1;
20825 emit_move_insn (operand0, res);
20828 #include "gt-i386.h"