1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
817 static void ix86_sched_reorder_ppro (rtx *, rtx *);
818 static HOST_WIDE_INT ix86_GOT_alias_set (void);
819 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
820 static rtx ix86_expand_aligntest (rtx, int);
821 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
822 static int ix86_issue_rate (void);
823 static int ix86_adjust_cost (rtx, rtx, rtx, int);
824 static void ix86_sched_init (FILE *, int, int);
825 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
826 static int ix86_variable_issue (FILE *, int, rtx, int);
827 static int ia32_use_dfa_pipeline_interface (void);
828 static int ia32_multipass_dfa_lookahead (void);
829 static void ix86_init_mmx_sse_builtins (void);
830 static rtx x86_this_parameter (tree);
831 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree);
833 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
834 static void x86_file_start (void);
835 static void ix86_reorg (void);
836 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 static tree ix86_build_builtin_va_list (void);
841 rtx base, index, disp;
843 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
846 static int ix86_decompose_address (rtx, struct ix86_address *);
847 static int ix86_address_cost (rtx);
848 static bool ix86_cannot_force_const_mem (rtx);
849 static rtx ix86_delegitimize_address (rtx);
851 struct builtin_description;
852 static rtx ix86_expand_sse_comi (const struct builtin_description *,
854 static rtx ix86_expand_sse_compare (const struct builtin_description *,
856 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
857 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
858 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
859 static rtx ix86_expand_store_builtin (enum insn_code, tree);
860 static rtx safe_vector_operand (rtx, enum machine_mode);
861 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
862 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
863 enum rtx_code *, enum rtx_code *);
864 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
865 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
866 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
867 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
868 static int ix86_fp_comparison_cost (enum rtx_code code);
869 static unsigned int ix86_select_alt_pic_regnum (void);
870 static int ix86_save_reg (unsigned int, int);
871 static void ix86_compute_frame_layout (struct ix86_frame *);
872 static int ix86_comp_type_attributes (tree, tree);
873 static int ix86_function_regparm (tree, tree);
874 const struct attribute_spec ix86_attribute_table[];
875 static bool ix86_function_ok_for_sibcall (tree, tree);
876 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
877 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
878 static int ix86_value_regno (enum machine_mode);
879 static bool contains_128bit_aligned_vector_p (tree);
880 static bool ix86_ms_bitfield_layout_p (tree);
881 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
882 static int extended_reg_mentioned_1 (rtx *, void *);
883 static bool ix86_rtx_costs (rtx, int, int, int *);
884 static int min_insn_size (rtx);
885 static void k8_avoid_jump_misspredicts (void);
887 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
888 static void ix86_svr3_asm_out_constructor (rtx, int);
891 /* Register class used for passing given 64bit part of the argument.
892 These represent classes as documented by the PS ABI, with the exception
893 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
894 use SF or DFmode move instead of DImode to avoid reformatting penalties.
896 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
897 whenever possible (upper half does contain padding).
899 enum x86_64_reg_class
902 X86_64_INTEGER_CLASS,
903 X86_64_INTEGERSI_CLASS,
912 static const char * const x86_64_reg_class_name[] =
913 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
915 #define MAX_CLASSES 4
916 static int classify_argument (enum machine_mode, tree,
917 enum x86_64_reg_class [MAX_CLASSES], int);
918 static int examine_argument (enum machine_mode, tree, int, int *, int *);
919 static rtx construct_container (enum machine_mode, tree, int, int, int,
921 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
922 enum x86_64_reg_class);
924 /* Table of constants used by fldpi, fldln2, etc.... */
925 static REAL_VALUE_TYPE ext_80387_constants_table [5];
926 static bool ext_80387_constants_init = 0;
927 static void init_ext_80387_constants (void);
929 /* Initialize the GCC target structure. */
930 #undef TARGET_ATTRIBUTE_TABLE
931 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
932 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
933 # undef TARGET_MERGE_DECL_ATTRIBUTES
934 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
937 #undef TARGET_COMP_TYPE_ATTRIBUTES
938 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
940 #undef TARGET_INIT_BUILTINS
941 #define TARGET_INIT_BUILTINS ix86_init_builtins
943 #undef TARGET_EXPAND_BUILTIN
944 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
946 #undef TARGET_ASM_FUNCTION_EPILOGUE
947 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
949 #undef TARGET_ASM_OPEN_PAREN
950 #define TARGET_ASM_OPEN_PAREN ""
951 #undef TARGET_ASM_CLOSE_PAREN
952 #define TARGET_ASM_CLOSE_PAREN ""
954 #undef TARGET_ASM_ALIGNED_HI_OP
955 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
956 #undef TARGET_ASM_ALIGNED_SI_OP
957 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
959 #undef TARGET_ASM_ALIGNED_DI_OP
960 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
963 #undef TARGET_ASM_UNALIGNED_HI_OP
964 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
965 #undef TARGET_ASM_UNALIGNED_SI_OP
966 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
967 #undef TARGET_ASM_UNALIGNED_DI_OP
968 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
970 #undef TARGET_SCHED_ADJUST_COST
971 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
972 #undef TARGET_SCHED_ISSUE_RATE
973 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
974 #undef TARGET_SCHED_VARIABLE_ISSUE
975 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
976 #undef TARGET_SCHED_INIT
977 #define TARGET_SCHED_INIT ix86_sched_init
978 #undef TARGET_SCHED_REORDER
979 #define TARGET_SCHED_REORDER ix86_sched_reorder
980 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
981 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
982 ia32_use_dfa_pipeline_interface
983 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
984 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
985 ia32_multipass_dfa_lookahead
987 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
988 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
991 #undef TARGET_HAVE_TLS
992 #define TARGET_HAVE_TLS true
994 #undef TARGET_CANNOT_FORCE_CONST_MEM
995 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
997 #undef TARGET_DELEGITIMIZE_ADDRESS
998 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1000 #undef TARGET_MS_BITFIELD_LAYOUT_P
1001 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1003 #undef TARGET_ASM_OUTPUT_MI_THUNK
1004 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1005 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1006 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1008 #undef TARGET_ASM_FILE_START
1009 #define TARGET_ASM_FILE_START x86_file_start
1011 #undef TARGET_RTX_COSTS
1012 #define TARGET_RTX_COSTS ix86_rtx_costs
1013 #undef TARGET_ADDRESS_COST
1014 #define TARGET_ADDRESS_COST ix86_address_cost
1016 #undef TARGET_FIXED_CONDITION_CODE_REGS
1017 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1018 #undef TARGET_CC_MODES_COMPATIBLE
1019 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1021 #undef TARGET_MACHINE_DEPENDENT_REORG
1022 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1024 #undef TARGET_BUILD_BUILTIN_VA_LIST
1025 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1027 #undef TARGET_PROMOTE_PROTOTYPES
1028 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1030 #undef TARGET_STRUCT_VALUE_RTX
1031 #define TARGET_STRUCT_VALUE_RTX hook_rtx_tree_int_null
1033 struct gcc_target targetm = TARGET_INITIALIZER;
1035 /* The svr4 ABI for the i386 says that records and unions are returned
1037 #ifndef DEFAULT_PCC_STRUCT_RETURN
1038 #define DEFAULT_PCC_STRUCT_RETURN 1
1041 /* Sometimes certain combinations of command options do not make
1042 sense on a particular target machine. You can define a macro
1043 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1044 defined, is executed once just after all the command options have
1047 Don't use this macro to turn on various extra optimizations for
1048 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1051 override_options (void)
1054 /* Comes from final.c -- no real reason to change it. */
1055 #define MAX_CODE_ALIGN 16
1059 const struct processor_costs *cost; /* Processor costs */
1060 const int target_enable; /* Target flags to enable. */
1061 const int target_disable; /* Target flags to disable. */
1062 const int align_loop; /* Default alignments. */
1063 const int align_loop_max_skip;
1064 const int align_jump;
1065 const int align_jump_max_skip;
1066 const int align_func;
1068 const processor_target_table[PROCESSOR_max] =
1070 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1071 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1072 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1073 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1074 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1075 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1077 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1080 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1083 const char *const name; /* processor name or nickname. */
1084 const enum processor_type processor;
1085 const enum pta_flags
1090 PTA_PREFETCH_SSE = 8,
1096 const processor_alias_table[] =
1098 {"i386", PROCESSOR_I386, 0},
1099 {"i486", PROCESSOR_I486, 0},
1100 {"i586", PROCESSOR_PENTIUM, 0},
1101 {"pentium", PROCESSOR_PENTIUM, 0},
1102 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1103 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1104 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1105 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1106 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1107 {"i686", PROCESSOR_PENTIUMPRO, 0},
1108 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1109 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1110 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1111 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1112 PTA_MMX | PTA_PREFETCH_SSE},
1113 {"k6", PROCESSOR_K6, PTA_MMX},
1114 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1115 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1116 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1118 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1119 | PTA_3DNOW | PTA_3DNOW_A},
1120 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1121 | PTA_3DNOW_A | PTA_SSE},
1122 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123 | PTA_3DNOW_A | PTA_SSE},
1124 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 | PTA_3DNOW_A | PTA_SSE},
1126 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1127 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1128 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1129 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1130 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136 int const pta_size = ARRAY_SIZE (processor_alias_table);
1138 /* Set the default values for switches whose default depends on TARGET_64BIT
1139 in case they weren't overwritten by command line options. */
1142 if (flag_omit_frame_pointer == 2)
1143 flag_omit_frame_pointer = 1;
1144 if (flag_asynchronous_unwind_tables == 2)
1145 flag_asynchronous_unwind_tables = 1;
1146 if (flag_pcc_struct_return == 2)
1147 flag_pcc_struct_return = 0;
1151 if (flag_omit_frame_pointer == 2)
1152 flag_omit_frame_pointer = 0;
1153 if (flag_asynchronous_unwind_tables == 2)
1154 flag_asynchronous_unwind_tables = 0;
1155 if (flag_pcc_struct_return == 2)
1156 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1160 SUBTARGET_OVERRIDE_OPTIONS;
1163 if (!ix86_tune_string && ix86_arch_string)
1164 ix86_tune_string = ix86_arch_string;
1165 if (!ix86_tune_string)
1166 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1167 if (!ix86_arch_string)
1168 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1170 if (ix86_cmodel_string != 0)
1172 if (!strcmp (ix86_cmodel_string, "small"))
1173 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1175 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1176 else if (!strcmp (ix86_cmodel_string, "32"))
1177 ix86_cmodel = CM_32;
1178 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1179 ix86_cmodel = CM_KERNEL;
1180 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1181 ix86_cmodel = CM_MEDIUM;
1182 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1183 ix86_cmodel = CM_LARGE;
1185 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1189 ix86_cmodel = CM_32;
1191 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1193 if (ix86_asm_string != 0)
1195 if (!strcmp (ix86_asm_string, "intel"))
1196 ix86_asm_dialect = ASM_INTEL;
1197 else if (!strcmp (ix86_asm_string, "att"))
1198 ix86_asm_dialect = ASM_ATT;
1200 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1202 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1203 error ("code model `%s' not supported in the %s bit mode",
1204 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1205 if (ix86_cmodel == CM_LARGE)
1206 sorry ("code model `large' not supported yet");
1207 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1208 sorry ("%i-bit mode not compiled in",
1209 (target_flags & MASK_64BIT) ? 64 : 32);
1211 for (i = 0; i < pta_size; i++)
1212 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1214 ix86_arch = processor_alias_table[i].processor;
1215 /* Default cpu tuning to the architecture. */
1216 ix86_tune = ix86_arch;
1217 if (processor_alias_table[i].flags & PTA_MMX
1218 && !(target_flags_explicit & MASK_MMX))
1219 target_flags |= MASK_MMX;
1220 if (processor_alias_table[i].flags & PTA_3DNOW
1221 && !(target_flags_explicit & MASK_3DNOW))
1222 target_flags |= MASK_3DNOW;
1223 if (processor_alias_table[i].flags & PTA_3DNOW_A
1224 && !(target_flags_explicit & MASK_3DNOW_A))
1225 target_flags |= MASK_3DNOW_A;
1226 if (processor_alias_table[i].flags & PTA_SSE
1227 && !(target_flags_explicit & MASK_SSE))
1228 target_flags |= MASK_SSE;
1229 if (processor_alias_table[i].flags & PTA_SSE2
1230 && !(target_flags_explicit & MASK_SSE2))
1231 target_flags |= MASK_SSE2;
1232 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1233 x86_prefetch_sse = true;
1234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1235 error ("CPU you selected does not support x86-64 instruction set");
1240 error ("bad value (%s) for -march= switch", ix86_arch_string);
1242 for (i = 0; i < pta_size; i++)
1243 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1245 ix86_tune = processor_alias_table[i].processor;
1246 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1247 error ("CPU you selected does not support x86-64 instruction set");
1250 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1251 x86_prefetch_sse = true;
1253 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1256 ix86_cost = &size_cost;
1258 ix86_cost = processor_target_table[ix86_tune].cost;
1259 target_flags |= processor_target_table[ix86_tune].target_enable;
1260 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1262 /* Arrange to set up i386_stack_locals for all functions. */
1263 init_machine_status = ix86_init_machine_status;
1265 /* Validate -mregparm= value. */
1266 if (ix86_regparm_string)
1268 i = atoi (ix86_regparm_string);
1269 if (i < 0 || i > REGPARM_MAX)
1270 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1276 ix86_regparm = REGPARM_MAX;
1278 /* If the user has provided any of the -malign-* options,
1279 warn and use that value only if -falign-* is not set.
1280 Remove this code in GCC 3.2 or later. */
1281 if (ix86_align_loops_string)
1283 warning ("-malign-loops is obsolete, use -falign-loops");
1284 if (align_loops == 0)
1286 i = atoi (ix86_align_loops_string);
1287 if (i < 0 || i > MAX_CODE_ALIGN)
1288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1290 align_loops = 1 << i;
1294 if (ix86_align_jumps_string)
1296 warning ("-malign-jumps is obsolete, use -falign-jumps");
1297 if (align_jumps == 0)
1299 i = atoi (ix86_align_jumps_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 align_jumps = 1 << i;
1307 if (ix86_align_funcs_string)
1309 warning ("-malign-functions is obsolete, use -falign-functions");
1310 if (align_functions == 0)
1312 i = atoi (ix86_align_funcs_string);
1313 if (i < 0 || i > MAX_CODE_ALIGN)
1314 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1316 align_functions = 1 << i;
1320 /* Default align_* from the processor table. */
1321 if (align_loops == 0)
1323 align_loops = processor_target_table[ix86_tune].align_loop;
1324 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1326 if (align_jumps == 0)
1328 align_jumps = processor_target_table[ix86_tune].align_jump;
1329 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1331 if (align_functions == 0)
1333 align_functions = processor_target_table[ix86_tune].align_func;
1336 /* Validate -mpreferred-stack-boundary= value, or provide default.
1337 The default of 128 bits is for Pentium III's SSE __m128, but we
1338 don't want additional code to keep the stack aligned when
1339 optimizing for code size. */
1340 ix86_preferred_stack_boundary = (optimize_size
1341 ? TARGET_64BIT ? 128 : 32
1343 if (ix86_preferred_stack_boundary_string)
1345 i = atoi (ix86_preferred_stack_boundary_string);
1346 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1347 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1348 TARGET_64BIT ? 4 : 2);
1350 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1353 /* Validate -mbranch-cost= value, or provide default. */
1354 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1355 if (ix86_branch_cost_string)
1357 i = atoi (ix86_branch_cost_string);
1359 error ("-mbranch-cost=%d is not between 0 and 5", i);
1361 ix86_branch_cost = i;
1364 if (ix86_tls_dialect_string)
1366 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1367 ix86_tls_dialect = TLS_DIALECT_GNU;
1368 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1369 ix86_tls_dialect = TLS_DIALECT_SUN;
1371 error ("bad value (%s) for -mtls-dialect= switch",
1372 ix86_tls_dialect_string);
1375 /* Keep nonleaf frame pointers. */
1376 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1377 flag_omit_frame_pointer = 1;
1379 /* If we're doing fast math, we don't care about comparison order
1380 wrt NaNs. This lets us use a shorter comparison sequence. */
1381 if (flag_unsafe_math_optimizations)
1382 target_flags &= ~MASK_IEEE_FP;
1384 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1385 since the insns won't need emulation. */
1386 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1387 target_flags &= ~MASK_NO_FANCY_MATH_387;
1389 /* Turn on SSE2 builtins for -mpni. */
1391 target_flags |= MASK_SSE2;
1393 /* Turn on SSE builtins for -msse2. */
1395 target_flags |= MASK_SSE;
1399 if (TARGET_ALIGN_DOUBLE)
1400 error ("-malign-double makes no sense in the 64bit mode");
1402 error ("-mrtd calling convention not supported in the 64bit mode");
1403 /* Enable by default the SSE and MMX builtins. */
1404 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1405 ix86_fpmath = FPMATH_SSE;
1409 ix86_fpmath = FPMATH_387;
1410 /* i386 ABI does not specify red zone. It still makes sense to use it
1411 when programmer takes care to stack from being destroyed. */
1412 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1413 target_flags |= MASK_NO_RED_ZONE;
1416 if (ix86_fpmath_string != 0)
1418 if (! strcmp (ix86_fpmath_string, "387"))
1419 ix86_fpmath = FPMATH_387;
1420 else if (! strcmp (ix86_fpmath_string, "sse"))
1424 warning ("SSE instruction set disabled, using 387 arithmetics");
1425 ix86_fpmath = FPMATH_387;
1428 ix86_fpmath = FPMATH_SSE;
1430 else if (! strcmp (ix86_fpmath_string, "387,sse")
1431 || ! strcmp (ix86_fpmath_string, "sse,387"))
1435 warning ("SSE instruction set disabled, using 387 arithmetics");
1436 ix86_fpmath = FPMATH_387;
1438 else if (!TARGET_80387)
1440 warning ("387 instruction set disabled, using SSE arithmetics");
1441 ix86_fpmath = FPMATH_SSE;
1444 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1447 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1450 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1454 target_flags |= MASK_MMX;
1455 x86_prefetch_sse = true;
1458 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1461 target_flags |= MASK_MMX;
1462 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1463 extensions it adds. */
1464 if (x86_3dnow_a & (1 << ix86_arch))
1465 target_flags |= MASK_3DNOW_A;
1467 if ((x86_accumulate_outgoing_args & TUNEMASK)
1468 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1470 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1472 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1475 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1476 p = strchr (internal_label_prefix, 'X');
1477 internal_label_prefix_len = p - internal_label_prefix;
1483 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1485 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1486 make the problem with not enough registers even worse. */
1487 #ifdef INSN_SCHEDULING
1489 flag_schedule_insns = 0;
1492 /* The default values of these switches depend on the TARGET_64BIT
1493 that is not known at this moment. Mark these values with 2 and
1494 let user the to override these. In case there is no command line option
1495 specifying them, we will set the defaults in override_options. */
1497 flag_omit_frame_pointer = 2;
1498 flag_pcc_struct_return = 2;
1499 flag_asynchronous_unwind_tables = 2;
1502 /* Table of valid machine attributes. */
1503 const struct attribute_spec ix86_attribute_table[] =
1505 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1506 /* Stdcall attribute says callee is responsible for popping arguments
1507 if they are not variable. */
1508 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1509 /* Fastcall attribute says callee is responsible for popping arguments
1510 if they are not variable. */
1511 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1512 /* Cdecl attribute says the callee is a normal C declaration */
1513 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1514 /* Regparm attribute specifies how many integer arguments are to be
1515 passed in registers. */
1516 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1517 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1518 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1519 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1520 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1522 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1523 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1524 { NULL, 0, 0, false, false, false, NULL }
1527 /* Decide whether we can make a sibling call to a function. DECL is the
1528 declaration of the function being targeted by the call and EXP is the
1529 CALL_EXPR representing the call. */
1532 ix86_function_ok_for_sibcall (tree decl, tree exp)
1534 /* If we are generating position-independent code, we cannot sibcall
1535 optimize any indirect call, or a direct call to a global function,
1536 as the PLT requires %ebx be live. */
1537 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1540 /* If we are returning floats on the 80387 register stack, we cannot
1541 make a sibcall from a function that doesn't return a float to a
1542 function that does or, conversely, from a function that does return
1543 a float to a function that doesn't; the necessary stack adjustment
1544 would not be executed. */
1545 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1546 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1549 /* If this call is indirect, we'll need to be able to use a call-clobbered
1550 register for the address of the target function. Make sure that all
1551 such registers are not used for passing parameters. */
1552 if (!decl && !TARGET_64BIT)
1556 /* We're looking at the CALL_EXPR, we need the type of the function. */
1557 type = TREE_OPERAND (exp, 0); /* pointer expression */
1558 type = TREE_TYPE (type); /* pointer type */
1559 type = TREE_TYPE (type); /* function type */
1561 if (ix86_function_regparm (type, NULL) >= 3)
1563 /* ??? Need to count the actual number of registers to be used,
1564 not the possible number of registers. Fix later. */
1569 /* Otherwise okay. That also includes certain types of indirect calls. */
1573 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1574 arguments as in struct attribute_spec.handler. */
1576 ix86_handle_cdecl_attribute (tree *node, tree name,
1577 tree args ATTRIBUTE_UNUSED,
1578 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1580 if (TREE_CODE (*node) != FUNCTION_TYPE
1581 && TREE_CODE (*node) != METHOD_TYPE
1582 && TREE_CODE (*node) != FIELD_DECL
1583 && TREE_CODE (*node) != TYPE_DECL)
1585 warning ("`%s' attribute only applies to functions",
1586 IDENTIFIER_POINTER (name));
1587 *no_add_attrs = true;
1591 if (is_attribute_p ("fastcall", name))
1593 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1595 error ("fastcall and stdcall attributes are not compatible");
1597 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1599 error ("fastcall and regparm attributes are not compatible");
1602 else if (is_attribute_p ("stdcall", name))
1604 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1606 error ("fastcall and stdcall attributes are not compatible");
1613 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1614 *no_add_attrs = true;
1620 /* Handle a "regparm" attribute;
1621 arguments as in struct attribute_spec.handler. */
1623 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1624 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1626 if (TREE_CODE (*node) != FUNCTION_TYPE
1627 && TREE_CODE (*node) != METHOD_TYPE
1628 && TREE_CODE (*node) != FIELD_DECL
1629 && TREE_CODE (*node) != TYPE_DECL)
1631 warning ("`%s' attribute only applies to functions",
1632 IDENTIFIER_POINTER (name));
1633 *no_add_attrs = true;
1639 cst = TREE_VALUE (args);
1640 if (TREE_CODE (cst) != INTEGER_CST)
1642 warning ("`%s' attribute requires an integer constant argument",
1643 IDENTIFIER_POINTER (name));
1644 *no_add_attrs = true;
1646 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1648 warning ("argument to `%s' attribute larger than %d",
1649 IDENTIFIER_POINTER (name), REGPARM_MAX);
1650 *no_add_attrs = true;
1653 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1655 error ("fastcall and regparm attributes are not compatible");
1662 /* Return 0 if the attributes for two types are incompatible, 1 if they
1663 are compatible, and 2 if they are nearly compatible (which causes a
1664 warning to be generated). */
1667 ix86_comp_type_attributes (tree type1, tree type2)
1669 /* Check for mismatch of non-default calling convention. */
1670 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1672 if (TREE_CODE (type1) != FUNCTION_TYPE)
1675 /* Check for mismatched fastcall types */
1676 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1677 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1680 /* Check for mismatched return types (cdecl vs stdcall). */
1681 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1682 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1687 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1688 DECL may be NULL when calling function indirectly
1689 or considering a libcall. */
1692 ix86_function_regparm (tree type, tree decl)
1695 int regparm = ix86_regparm;
1696 bool user_convention = false;
1700 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1703 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1704 user_convention = true;
1707 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1710 user_convention = true;
1713 /* Use register calling convention for local functions when possible. */
1714 if (!TARGET_64BIT && !user_convention && decl
1715 && flag_unit_at_a_time && !profile_flag)
1717 struct cgraph_local_info *i = cgraph_local_info (decl);
1720 /* We can't use regparm(3) for nested functions as these use
1721 static chain pointer in third argument. */
1722 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1732 /* Return true if EAX is live at the start of the function. Used by
1733 ix86_expand_prologue to determine if we need special help before
1734 calling allocate_stack_worker. */
1737 ix86_eax_live_at_start_p (void)
1739 /* Cheat. Don't bother working forward from ix86_function_regparm
1740 to the function type to whether an actual argument is located in
1741 eax. Instead just look at cfg info, which is still close enough
1742 to correct at this point. This gives false positives for broken
1743 functions that might use uninitialized data that happens to be
1744 allocated in eax, but who cares? */
1745 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1748 /* Value is the number of bytes of arguments automatically
1749 popped when returning from a subroutine call.
1750 FUNDECL is the declaration node of the function (as a tree),
1751 FUNTYPE is the data type of the function (as a tree),
1752 or for a library call it is an identifier node for the subroutine name.
1753 SIZE is the number of bytes of arguments passed on the stack.
1755 On the 80386, the RTD insn may be used to pop them if the number
1756 of args is fixed, but if the number is variable then the caller
1757 must pop them all. RTD can't be used for library calls now
1758 because the library is compiled with the Unix compiler.
1759 Use of RTD is a selectable option, since it is incompatible with
1760 standard Unix calling sequences. If the option is not selected,
1761 the caller must always pop the args.
1763 The attribute stdcall is equivalent to RTD on a per module basis. */
1766 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1768 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1770 /* Cdecl functions override -mrtd, and never pop the stack. */
1771 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1773 /* Stdcall and fastcall functions will pop the stack if not
1775 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1776 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1780 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1781 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1782 == void_type_node)))
1786 /* Lose any fake structure return argument if it is passed on the stack. */
1787 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1790 int nregs = ix86_function_regparm (funtype, fundecl);
1793 return GET_MODE_SIZE (Pmode);
1799 /* Argument support functions. */
1801 /* Return true when register may be used to pass function parameters. */
1803 ix86_function_arg_regno_p (int regno)
1807 return (regno < REGPARM_MAX
1808 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1809 if (SSE_REGNO_P (regno) && TARGET_SSE)
1811 /* RAX is used as hidden argument to va_arg functions. */
1814 for (i = 0; i < REGPARM_MAX; i++)
1815 if (regno == x86_64_int_parameter_registers[i])
1820 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1821 for a call to a function whose data type is FNTYPE.
1822 For a library call, FNTYPE is 0. */
1825 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1826 tree fntype, /* tree ptr for function decl */
1827 rtx libname, /* SYMBOL_REF of library name or 0 */
1830 static CUMULATIVE_ARGS zero_cum;
1831 tree param, next_param;
1833 if (TARGET_DEBUG_ARG)
1835 fprintf (stderr, "\ninit_cumulative_args (");
1837 fprintf (stderr, "fntype code = %s, ret code = %s",
1838 tree_code_name[(int) TREE_CODE (fntype)],
1839 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1841 fprintf (stderr, "no fntype");
1844 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1849 /* Set up the number of registers to use for passing arguments. */
1851 cum->nregs = ix86_function_regparm (fntype, fndecl);
1853 cum->nregs = ix86_regparm;
1854 cum->sse_nregs = SSE_REGPARM_MAX;
1855 cum->mmx_nregs = MMX_REGPARM_MAX;
1856 cum->warn_sse = true;
1857 cum->warn_mmx = true;
1858 cum->maybe_vaarg = false;
1860 /* Use ecx and edx registers if function has fastcall attribute */
1861 if (fntype && !TARGET_64BIT)
1863 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1871 /* Determine if this function has variable arguments. This is
1872 indicated by the last argument being 'void_type_mode' if there
1873 are no variable arguments. If there are variable arguments, then
1874 we won't pass anything in registers */
1876 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1878 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1879 param != 0; param = next_param)
1881 next_param = TREE_CHAIN (param);
1882 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1893 cum->maybe_vaarg = true;
1897 if ((!fntype && !libname)
1898 || (fntype && !TYPE_ARG_TYPES (fntype)))
1899 cum->maybe_vaarg = 1;
1901 if (TARGET_DEBUG_ARG)
1902 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1907 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1908 of this code is to classify each 8bytes of incoming argument by the register
1909 class and assign registers accordingly. */
1911 /* Return the union class of CLASS1 and CLASS2.
1912 See the x86-64 PS ABI for details. */
1914 static enum x86_64_reg_class
1915 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1917 /* Rule #1: If both classes are equal, this is the resulting class. */
1918 if (class1 == class2)
1921 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1923 if (class1 == X86_64_NO_CLASS)
1925 if (class2 == X86_64_NO_CLASS)
1928 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1929 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1930 return X86_64_MEMORY_CLASS;
1932 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1933 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1934 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1935 return X86_64_INTEGERSI_CLASS;
1936 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1937 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1938 return X86_64_INTEGER_CLASS;
1940 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1941 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1942 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1943 return X86_64_MEMORY_CLASS;
1945 /* Rule #6: Otherwise class SSE is used. */
1946 return X86_64_SSE_CLASS;
1949 /* Classify the argument of type TYPE and mode MODE.
1950 CLASSES will be filled by the register class used to pass each word
1951 of the operand. The number of words is returned. In case the parameter
1952 should be passed in memory, 0 is returned. As a special case for zero
1953 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1955 BIT_OFFSET is used internally for handling records and specifies offset
1956 of the offset in bits modulo 256 to avoid overflow cases.
1958 See the x86-64 PS ABI for details.
1962 classify_argument (enum machine_mode mode, tree type,
1963 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1965 HOST_WIDE_INT bytes =
1966 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1967 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1969 /* Variable sized entities are always passed/returned in memory. */
1973 if (mode != VOIDmode
1974 && MUST_PASS_IN_STACK (mode, type))
1977 if (type && AGGREGATE_TYPE_P (type))
1981 enum x86_64_reg_class subclasses[MAX_CLASSES];
1983 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1987 for (i = 0; i < words; i++)
1988 classes[i] = X86_64_NO_CLASS;
1990 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1991 signalize memory class, so handle it as special case. */
1994 classes[0] = X86_64_NO_CLASS;
1998 /* Classify each field of record and merge classes. */
1999 if (TREE_CODE (type) == RECORD_TYPE)
2001 /* For classes first merge in the field of the subclasses. */
2002 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2004 tree bases = TYPE_BINFO_BASETYPES (type);
2005 int n_bases = TREE_VEC_LENGTH (bases);
2008 for (i = 0; i < n_bases; ++i)
2010 tree binfo = TREE_VEC_ELT (bases, i);
2012 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2013 tree type = BINFO_TYPE (binfo);
2015 num = classify_argument (TYPE_MODE (type),
2017 (offset + bit_offset) % 256);
2020 for (i = 0; i < num; i++)
2022 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2024 merge_classes (subclasses[i], classes[i + pos]);
2028 /* And now merge the fields of structure. */
2029 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2031 if (TREE_CODE (field) == FIELD_DECL)
2035 /* Bitfields are always classified as integer. Handle them
2036 early, since later code would consider them to be
2037 misaligned integers. */
2038 if (DECL_BIT_FIELD (field))
2040 for (i = int_bit_position (field) / 8 / 8;
2041 i < (int_bit_position (field)
2042 + tree_low_cst (DECL_SIZE (field), 0)
2045 merge_classes (X86_64_INTEGER_CLASS,
2050 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2051 TREE_TYPE (field), subclasses,
2052 (int_bit_position (field)
2053 + bit_offset) % 256);
2056 for (i = 0; i < num; i++)
2059 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2061 merge_classes (subclasses[i], classes[i + pos]);
2067 /* Arrays are handled as small records. */
2068 else if (TREE_CODE (type) == ARRAY_TYPE)
2071 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2072 TREE_TYPE (type), subclasses, bit_offset);
2076 /* The partial classes are now full classes. */
2077 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2078 subclasses[0] = X86_64_SSE_CLASS;
2079 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2080 subclasses[0] = X86_64_INTEGER_CLASS;
2082 for (i = 0; i < words; i++)
2083 classes[i] = subclasses[i % num];
2085 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2086 else if (TREE_CODE (type) == UNION_TYPE
2087 || TREE_CODE (type) == QUAL_UNION_TYPE)
2089 /* For classes first merge in the field of the subclasses. */
2090 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2092 tree bases = TYPE_BINFO_BASETYPES (type);
2093 int n_bases = TREE_VEC_LENGTH (bases);
2096 for (i = 0; i < n_bases; ++i)
2098 tree binfo = TREE_VEC_ELT (bases, i);
2100 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2101 tree type = BINFO_TYPE (binfo);
2103 num = classify_argument (TYPE_MODE (type),
2105 (offset + (bit_offset % 64)) % 256);
2108 for (i = 0; i < num; i++)
2110 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2112 merge_classes (subclasses[i], classes[i + pos]);
2116 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2118 if (TREE_CODE (field) == FIELD_DECL)
2121 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2122 TREE_TYPE (field), subclasses,
2126 for (i = 0; i < num; i++)
2127 classes[i] = merge_classes (subclasses[i], classes[i]);
2131 else if (TREE_CODE (type) == SET_TYPE)
2135 classes[0] = X86_64_INTEGERSI_CLASS;
2138 else if (bytes <= 8)
2140 classes[0] = X86_64_INTEGER_CLASS;
2143 else if (bytes <= 12)
2145 classes[0] = X86_64_INTEGER_CLASS;
2146 classes[1] = X86_64_INTEGERSI_CLASS;
2151 classes[0] = X86_64_INTEGER_CLASS;
2152 classes[1] = X86_64_INTEGER_CLASS;
2159 /* Final merger cleanup. */
2160 for (i = 0; i < words; i++)
2162 /* If one class is MEMORY, everything should be passed in
2164 if (classes[i] == X86_64_MEMORY_CLASS)
2167 /* The X86_64_SSEUP_CLASS should be always preceded by
2168 X86_64_SSE_CLASS. */
2169 if (classes[i] == X86_64_SSEUP_CLASS
2170 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2171 classes[i] = X86_64_SSE_CLASS;
2173 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2174 if (classes[i] == X86_64_X87UP_CLASS
2175 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2176 classes[i] = X86_64_SSE_CLASS;
2181 /* Compute alignment needed. We align all types to natural boundaries with
2182 exception of XFmode that is aligned to 64bits. */
2183 if (mode != VOIDmode && mode != BLKmode)
2185 int mode_alignment = GET_MODE_BITSIZE (mode);
2188 mode_alignment = 128;
2189 else if (mode == XCmode)
2190 mode_alignment = 256;
2191 /* Misaligned fields are always returned in memory. */
2192 if (bit_offset % mode_alignment)
2196 /* Classification of atomic types. */
2206 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2207 classes[0] = X86_64_INTEGERSI_CLASS;
2209 classes[0] = X86_64_INTEGER_CLASS;
2213 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2216 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2217 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2220 if (!(bit_offset % 64))
2221 classes[0] = X86_64_SSESF_CLASS;
2223 classes[0] = X86_64_SSE_CLASS;
2226 classes[0] = X86_64_SSEDF_CLASS;
2229 classes[0] = X86_64_X87_CLASS;
2230 classes[1] = X86_64_X87UP_CLASS;
2236 classes[0] = X86_64_X87_CLASS;
2237 classes[1] = X86_64_X87UP_CLASS;
2238 classes[2] = X86_64_X87_CLASS;
2239 classes[3] = X86_64_X87UP_CLASS;
2242 classes[0] = X86_64_SSEDF_CLASS;
2243 classes[1] = X86_64_SSEDF_CLASS;
2246 classes[0] = X86_64_SSE_CLASS;
2254 classes[0] = X86_64_SSE_CLASS;
2255 classes[1] = X86_64_SSEUP_CLASS;
2270 /* Examine the argument and return set number of register required in each
2271 class. Return 0 iff parameter should be passed in memory. */
2273 examine_argument (enum machine_mode mode, tree type, int in_return,
2274 int *int_nregs, int *sse_nregs)
2276 enum x86_64_reg_class class[MAX_CLASSES];
2277 int n = classify_argument (mode, type, class, 0);
2283 for (n--; n >= 0; n--)
2286 case X86_64_INTEGER_CLASS:
2287 case X86_64_INTEGERSI_CLASS:
2290 case X86_64_SSE_CLASS:
2291 case X86_64_SSESF_CLASS:
2292 case X86_64_SSEDF_CLASS:
2295 case X86_64_NO_CLASS:
2296 case X86_64_SSEUP_CLASS:
2298 case X86_64_X87_CLASS:
2299 case X86_64_X87UP_CLASS:
2303 case X86_64_MEMORY_CLASS:
2308 /* Construct container for the argument used by GCC interface. See
2309 FUNCTION_ARG for the detailed description. */
2311 construct_container (enum machine_mode mode, tree type, int in_return,
2312 int nintregs, int nsseregs, const int * intreg,
2315 enum machine_mode tmpmode;
2317 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2318 enum x86_64_reg_class class[MAX_CLASSES];
2322 int needed_sseregs, needed_intregs;
2323 rtx exp[MAX_CLASSES];
2326 n = classify_argument (mode, type, class, 0);
2327 if (TARGET_DEBUG_ARG)
2330 fprintf (stderr, "Memory class\n");
2333 fprintf (stderr, "Classes:");
2334 for (i = 0; i < n; i++)
2336 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2338 fprintf (stderr, "\n");
2343 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2345 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2348 /* First construct simple cases. Avoid SCmode, since we want to use
2349 single register to pass this type. */
2350 if (n == 1 && mode != SCmode)
2353 case X86_64_INTEGER_CLASS:
2354 case X86_64_INTEGERSI_CLASS:
2355 return gen_rtx_REG (mode, intreg[0]);
2356 case X86_64_SSE_CLASS:
2357 case X86_64_SSESF_CLASS:
2358 case X86_64_SSEDF_CLASS:
2359 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2360 case X86_64_X87_CLASS:
2361 return gen_rtx_REG (mode, FIRST_STACK_REG);
2362 case X86_64_NO_CLASS:
2363 /* Zero sized array, struct or class. */
2368 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2369 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2371 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2372 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2373 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2374 && class[1] == X86_64_INTEGER_CLASS
2375 && (mode == CDImode || mode == TImode || mode == TFmode)
2376 && intreg[0] + 1 == intreg[1])
2377 return gen_rtx_REG (mode, intreg[0]);
2379 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2380 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2381 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2383 /* Otherwise figure out the entries of the PARALLEL. */
2384 for (i = 0; i < n; i++)
2388 case X86_64_NO_CLASS:
2390 case X86_64_INTEGER_CLASS:
2391 case X86_64_INTEGERSI_CLASS:
2392 /* Merge TImodes on aligned occasions here too. */
2393 if (i * 8 + 8 > bytes)
2394 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2395 else if (class[i] == X86_64_INTEGERSI_CLASS)
2399 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2400 if (tmpmode == BLKmode)
2402 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2403 gen_rtx_REG (tmpmode, *intreg),
2407 case X86_64_SSESF_CLASS:
2408 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2409 gen_rtx_REG (SFmode,
2410 SSE_REGNO (sse_regno)),
2414 case X86_64_SSEDF_CLASS:
2415 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2416 gen_rtx_REG (DFmode,
2417 SSE_REGNO (sse_regno)),
2421 case X86_64_SSE_CLASS:
2422 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2426 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2427 gen_rtx_REG (tmpmode,
2428 SSE_REGNO (sse_regno)),
2430 if (tmpmode == TImode)
2438 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2439 for (i = 0; i < nexps; i++)
2440 XVECEXP (ret, 0, i) = exp [i];
2444 /* Update the data in CUM to advance over an argument
2445 of mode MODE and data type TYPE.
2446 (TYPE is null for libcalls where that information may not be available.) */
2449 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2450 enum machine_mode mode, /* current arg mode */
2451 tree type, /* type of the argument or 0 if lib support */
2452 int named) /* whether or not the argument was named */
2455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2458 if (TARGET_DEBUG_ARG)
2460 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2461 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2464 int int_nregs, sse_nregs;
2465 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2466 cum->words += words;
2467 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2469 cum->nregs -= int_nregs;
2470 cum->sse_nregs -= sse_nregs;
2471 cum->regno += int_nregs;
2472 cum->sse_regno += sse_nregs;
2475 cum->words += words;
2479 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2480 && (!type || !AGGREGATE_TYPE_P (type)))
2482 cum->sse_words += words;
2483 cum->sse_nregs -= 1;
2484 cum->sse_regno += 1;
2485 if (cum->sse_nregs <= 0)
2491 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2492 && (!type || !AGGREGATE_TYPE_P (type)))
2494 cum->mmx_words += words;
2495 cum->mmx_nregs -= 1;
2496 cum->mmx_regno += 1;
2497 if (cum->mmx_nregs <= 0)
2505 cum->words += words;
2506 cum->nregs -= words;
2507 cum->regno += words;
2509 if (cum->nregs <= 0)
2519 /* Define where to put the arguments to a function.
2520 Value is zero to push the argument on the stack,
2521 or a hard register in which to store the argument.
2523 MODE is the argument's machine mode.
2524 TYPE is the data type of the argument (as a tree).
2525 This is null for libcalls where that information may
2527 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2528 the preceding args and about the function being called.
2529 NAMED is nonzero if this argument is a named parameter
2530 (otherwise it is an extra parameter matching an ellipsis). */
2533 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2534 enum machine_mode mode, /* current arg mode */
2535 tree type, /* type of the argument or 0 if lib support */
2536 int named) /* != 0 for normal args, == 0 for ... args */
2540 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2541 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2542 static bool warnedsse, warnedmmx;
2544 /* Handle a hidden AL argument containing number of registers for varargs
2545 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2547 if (mode == VOIDmode)
2550 return GEN_INT (cum->maybe_vaarg
2551 ? (cum->sse_nregs < 0
2559 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2560 &x86_64_int_parameter_registers [cum->regno],
2565 /* For now, pass fp/complex values on the stack. */
2577 if (words <= cum->nregs)
2579 int regno = cum->regno;
2581 /* Fastcall allocates the first two DWORD (SImode) or
2582 smaller arguments to ECX and EDX. */
2585 if (mode == BLKmode || mode == DImode)
2588 /* ECX not EAX is the first allocated register. */
2592 ret = gen_rtx_REG (mode, regno);
2602 if (!type || !AGGREGATE_TYPE_P (type))
2604 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2607 warning ("SSE vector argument without SSE enabled "
2611 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2618 if (!type || !AGGREGATE_TYPE_P (type))
2620 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2623 warning ("MMX vector argument without MMX enabled "
2627 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2632 if (TARGET_DEBUG_ARG)
2635 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2636 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2639 print_simple_rtl (stderr, ret);
2641 fprintf (stderr, ", stack");
2643 fprintf (stderr, " )\n");
2649 /* A C expression that indicates when an argument must be passed by
2650 reference. If nonzero for an argument, a copy of that argument is
2651 made in memory and a pointer to the argument is passed instead of
2652 the argument itself. The pointer is passed in whatever way is
2653 appropriate for passing a pointer to that type. */
2656 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2657 enum machine_mode mode ATTRIBUTE_UNUSED,
2658 tree type, int named ATTRIBUTE_UNUSED)
2663 if (type && int_size_in_bytes (type) == -1)
2665 if (TARGET_DEBUG_ARG)
2666 fprintf (stderr, "function_arg_pass_by_reference\n");
2673 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2676 contains_128bit_aligned_vector_p (tree type)
2678 enum machine_mode mode = TYPE_MODE (type);
2679 if (SSE_REG_MODE_P (mode)
2680 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2682 if (TYPE_ALIGN (type) < 128)
2685 if (AGGREGATE_TYPE_P (type))
2687 /* Walk the aggregates recursively. */
2688 if (TREE_CODE (type) == RECORD_TYPE
2689 || TREE_CODE (type) == UNION_TYPE
2690 || TREE_CODE (type) == QUAL_UNION_TYPE)
2694 if (TYPE_BINFO (type) != NULL
2695 && TYPE_BINFO_BASETYPES (type) != NULL)
2697 tree bases = TYPE_BINFO_BASETYPES (type);
2698 int n_bases = TREE_VEC_LENGTH (bases);
2701 for (i = 0; i < n_bases; ++i)
2703 tree binfo = TREE_VEC_ELT (bases, i);
2704 tree type = BINFO_TYPE (binfo);
2706 if (contains_128bit_aligned_vector_p (type))
2710 /* And now merge the fields of structure. */
2711 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2713 if (TREE_CODE (field) == FIELD_DECL
2714 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2718 /* Just for use if some languages passes arrays by value. */
2719 else if (TREE_CODE (type) == ARRAY_TYPE)
2721 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2730 /* Gives the alignment boundary, in bits, of an argument with the
2731 specified mode and type. */
2734 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2738 align = TYPE_ALIGN (type);
2740 align = GET_MODE_ALIGNMENT (mode);
2741 if (align < PARM_BOUNDARY)
2742 align = PARM_BOUNDARY;
2745 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2746 make an exception for SSE modes since these require 128bit
2749 The handling here differs from field_alignment. ICC aligns MMX
2750 arguments to 4 byte boundaries, while structure fields are aligned
2751 to 8 byte boundaries. */
2754 if (!SSE_REG_MODE_P (mode))
2755 align = PARM_BOUNDARY;
2759 if (!contains_128bit_aligned_vector_p (type))
2760 align = PARM_BOUNDARY;
2768 /* Return true if N is a possible register number of function value. */
2770 ix86_function_value_regno_p (int regno)
2774 return ((regno) == 0
2775 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2776 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2778 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2779 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2780 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2783 /* Define how to find the value returned by a function.
2784 VALTYPE is the data type of the value (as a tree).
2785 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2786 otherwise, FUNC is 0. */
2788 ix86_function_value (tree valtype)
2792 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2793 REGPARM_MAX, SSE_REGPARM_MAX,
2794 x86_64_int_return_registers, 0);
2795 /* For zero sized structures, construct_container return NULL, but we need
2796 to keep rest of compiler happy by returning meaningful value. */
2798 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2802 return gen_rtx_REG (TYPE_MODE (valtype),
2803 ix86_value_regno (TYPE_MODE (valtype)));
2806 /* Return false iff type is returned in memory. */
2808 ix86_return_in_memory (tree type)
2810 int needed_intregs, needed_sseregs, size;
2811 enum machine_mode mode = TYPE_MODE (type);
2814 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2816 if (mode == BLKmode)
2819 size = int_size_in_bytes (type);
2821 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2824 if (VECTOR_MODE_P (mode) || mode == TImode)
2826 /* User-created vectors small enough to fit in EAX. */
2830 /* MMX/3dNow values are returned on the stack, since we've
2831 got to EMMS/FEMMS before returning. */
2835 /* SSE values are returned in XMM0. */
2836 /* ??? Except when it doesn't exist? We have a choice of
2837 either (1) being abi incompatible with a -march switch,
2838 or (2) generating an error here. Given no good solution,
2839 I think the safest thing is one warning. The user won't
2840 be able to use -Werror, but.... */
2851 warning ("SSE vector return without SSE enabled "
2866 /* Define how to find the value returned by a library function
2867 assuming the value has mode MODE. */
2869 ix86_libcall_value (enum machine_mode mode)
2879 return gen_rtx_REG (mode, FIRST_SSE_REG);
2882 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2887 return gen_rtx_REG (mode, 0);
2891 return gen_rtx_REG (mode, ix86_value_regno (mode));
2894 /* Given a mode, return the register to use for a return value. */
2897 ix86_value_regno (enum machine_mode mode)
2899 /* Floating point return values in %st(0). */
2900 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2901 return FIRST_FLOAT_REG;
2902 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2903 we prevent this case when sse is not available. */
2904 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2905 return FIRST_SSE_REG;
2906 /* Everything else in %eax. */
2910 /* Create the va_list data type. */
2913 ix86_build_builtin_va_list (void)
2915 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2917 /* For i386 we use plain pointer to argument area. */
2919 return build_pointer_type (char_type_node);
2921 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2922 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2924 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2925 unsigned_type_node);
2926 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2927 unsigned_type_node);
2928 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2930 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2933 DECL_FIELD_CONTEXT (f_gpr) = record;
2934 DECL_FIELD_CONTEXT (f_fpr) = record;
2935 DECL_FIELD_CONTEXT (f_ovf) = record;
2936 DECL_FIELD_CONTEXT (f_sav) = record;
2938 TREE_CHAIN (record) = type_decl;
2939 TYPE_NAME (record) = type_decl;
2940 TYPE_FIELDS (record) = f_gpr;
2941 TREE_CHAIN (f_gpr) = f_fpr;
2942 TREE_CHAIN (f_fpr) = f_ovf;
2943 TREE_CHAIN (f_ovf) = f_sav;
2945 layout_type (record);
2947 /* The correct type is an array type of one element. */
2948 return build_array_type (record, build_index_type (size_zero_node));
2951 /* Perform any needed actions needed for a function that is receiving a
2952 variable number of arguments.
2956 MODE and TYPE are the mode and type of the current parameter.
2958 PRETEND_SIZE is a variable that should be set to the amount of stack
2959 that must be pushed by the prolog to pretend that our caller pushed
2962 Normally, this macro will push all remaining incoming registers on the
2963 stack and set PRETEND_SIZE to the length of the registers pushed. */
2966 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2967 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2970 CUMULATIVE_ARGS next_cum;
2971 rtx save_area = NULL_RTX, mem;
2984 /* Indicate to allocate space on the stack for varargs save area. */
2985 ix86_save_varrargs_registers = 1;
2987 cfun->stack_alignment_needed = 128;
2989 fntype = TREE_TYPE (current_function_decl);
2990 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2991 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2992 != void_type_node));
2994 /* For varargs, we do not want to skip the dummy va_dcl argument.
2995 For stdargs, we do want to skip the last named argument. */
2998 function_arg_advance (&next_cum, mode, type, 1);
3001 save_area = frame_pointer_rtx;
3003 set = get_varargs_alias_set ();
3005 for (i = next_cum.regno; i < ix86_regparm; i++)
3007 mem = gen_rtx_MEM (Pmode,
3008 plus_constant (save_area, i * UNITS_PER_WORD));
3009 set_mem_alias_set (mem, set);
3010 emit_move_insn (mem, gen_rtx_REG (Pmode,
3011 x86_64_int_parameter_registers[i]));
3014 if (next_cum.sse_nregs)
3016 /* Now emit code to save SSE registers. The AX parameter contains number
3017 of SSE parameter registers used to call this function. We use
3018 sse_prologue_save insn template that produces computed jump across
3019 SSE saves. We need some preparation work to get this working. */
3021 label = gen_label_rtx ();
3022 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3024 /* Compute address to jump to :
3025 label - 5*eax + nnamed_sse_arguments*5 */
3026 tmp_reg = gen_reg_rtx (Pmode);
3027 nsse_reg = gen_reg_rtx (Pmode);
3028 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3029 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3030 gen_rtx_MULT (Pmode, nsse_reg,
3032 if (next_cum.sse_regno)
3035 gen_rtx_CONST (DImode,
3036 gen_rtx_PLUS (DImode,
3038 GEN_INT (next_cum.sse_regno * 4))));
3040 emit_move_insn (nsse_reg, label_ref);
3041 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3043 /* Compute address of memory block we save into. We always use pointer
3044 pointing 127 bytes after first byte to store - this is needed to keep
3045 instruction size limited by 4 bytes. */
3046 tmp_reg = gen_reg_rtx (Pmode);
3047 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3048 plus_constant (save_area,
3049 8 * REGPARM_MAX + 127)));
3050 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3051 set_mem_alias_set (mem, set);
3052 set_mem_align (mem, BITS_PER_WORD);
3054 /* And finally do the dirty job! */
3055 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3056 GEN_INT (next_cum.sse_regno), label));
3061 /* Implement va_start. */
3064 ix86_va_start (tree valist, rtx nextarg)
3066 HOST_WIDE_INT words, n_gpr, n_fpr;
3067 tree f_gpr, f_fpr, f_ovf, f_sav;
3068 tree gpr, fpr, ovf, sav, t;
3070 /* Only 64bit target needs something special. */
3073 std_expand_builtin_va_start (valist, nextarg);
3077 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3078 f_fpr = TREE_CHAIN (f_gpr);
3079 f_ovf = TREE_CHAIN (f_fpr);
3080 f_sav = TREE_CHAIN (f_ovf);
3082 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3083 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3084 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3085 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3086 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3088 /* Count number of gp and fp argument registers used. */
3089 words = current_function_args_info.words;
3090 n_gpr = current_function_args_info.regno;
3091 n_fpr = current_function_args_info.sse_regno;
3093 if (TARGET_DEBUG_ARG)
3094 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3095 (int) words, (int) n_gpr, (int) n_fpr);
3097 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3098 build_int_2 (n_gpr * 8, 0));
3099 TREE_SIDE_EFFECTS (t) = 1;
3100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3102 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3103 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3104 TREE_SIDE_EFFECTS (t) = 1;
3105 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3107 /* Find the overflow area. */
3108 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3110 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3111 build_int_2 (words * UNITS_PER_WORD, 0));
3112 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3113 TREE_SIDE_EFFECTS (t) = 1;
3114 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3116 /* Find the register save area.
3117 Prologue of the function save it right above stack frame. */
3118 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3119 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3120 TREE_SIDE_EFFECTS (t) = 1;
3121 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3124 /* Implement va_arg. */
3126 ix86_va_arg (tree valist, tree type)
3128 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3129 tree f_gpr, f_fpr, f_ovf, f_sav;
3130 tree gpr, fpr, ovf, sav, t;
3132 rtx lab_false, lab_over = NULL_RTX;
3137 /* Only 64bit target needs something special. */
3140 return std_expand_builtin_va_arg (valist, type);
3143 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3144 f_fpr = TREE_CHAIN (f_gpr);
3145 f_ovf = TREE_CHAIN (f_fpr);
3146 f_sav = TREE_CHAIN (f_ovf);
3148 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3149 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3150 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3151 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3152 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3154 size = int_size_in_bytes (type);
3157 /* Passed by reference. */
3159 type = build_pointer_type (type);
3160 size = int_size_in_bytes (type);
3162 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3164 container = construct_container (TYPE_MODE (type), type, 0,
3165 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3167 * Pull the value out of the saved registers ...
3170 addr_rtx = gen_reg_rtx (Pmode);
3174 rtx int_addr_rtx, sse_addr_rtx;
3175 int needed_intregs, needed_sseregs;
3178 lab_over = gen_label_rtx ();
3179 lab_false = gen_label_rtx ();
3181 examine_argument (TYPE_MODE (type), type, 0,
3182 &needed_intregs, &needed_sseregs);
3185 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3186 || TYPE_ALIGN (type) > 128);
3188 /* In case we are passing structure, verify that it is consecutive block
3189 on the register save area. If not we need to do moves. */
3190 if (!need_temp && !REG_P (container))
3192 /* Verify that all registers are strictly consecutive */
3193 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3197 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3199 rtx slot = XVECEXP (container, 0, i);
3200 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3201 || INTVAL (XEXP (slot, 1)) != i * 16)
3209 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3211 rtx slot = XVECEXP (container, 0, i);
3212 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3213 || INTVAL (XEXP (slot, 1)) != i * 8)
3220 int_addr_rtx = addr_rtx;
3221 sse_addr_rtx = addr_rtx;
3225 int_addr_rtx = gen_reg_rtx (Pmode);
3226 sse_addr_rtx = gen_reg_rtx (Pmode);
3228 /* First ensure that we fit completely in registers. */
3231 emit_cmp_and_jump_insns (expand_expr
3232 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3233 GEN_INT ((REGPARM_MAX - needed_intregs +
3234 1) * 8), GE, const1_rtx, SImode,
3239 emit_cmp_and_jump_insns (expand_expr
3240 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3241 GEN_INT ((SSE_REGPARM_MAX -
3242 needed_sseregs + 1) * 16 +
3243 REGPARM_MAX * 8), GE, const1_rtx,
3244 SImode, 1, lab_false);
3247 /* Compute index to start of area used for integer regs. */
3250 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3251 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3252 if (r != int_addr_rtx)
3253 emit_move_insn (int_addr_rtx, r);
3257 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3258 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3259 if (r != sse_addr_rtx)
3260 emit_move_insn (sse_addr_rtx, r);
3268 /* Never use the memory itself, as it has the alias set. */
3269 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3270 mem = gen_rtx_MEM (BLKmode, x);
3271 force_operand (x, addr_rtx);
3272 set_mem_alias_set (mem, get_varargs_alias_set ());
3273 set_mem_align (mem, BITS_PER_UNIT);
3275 for (i = 0; i < XVECLEN (container, 0); i++)
3277 rtx slot = XVECEXP (container, 0, i);
3278 rtx reg = XEXP (slot, 0);
3279 enum machine_mode mode = GET_MODE (reg);
3285 if (SSE_REGNO_P (REGNO (reg)))
3287 src_addr = sse_addr_rtx;
3288 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3292 src_addr = int_addr_rtx;
3293 src_offset = REGNO (reg) * 8;
3295 src_mem = gen_rtx_MEM (mode, src_addr);
3296 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3297 src_mem = adjust_address (src_mem, mode, src_offset);
3298 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3299 emit_move_insn (dest_mem, src_mem);
3306 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3307 build_int_2 (needed_intregs * 8, 0));
3308 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3309 TREE_SIDE_EFFECTS (t) = 1;
3310 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3315 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3316 build_int_2 (needed_sseregs * 16, 0));
3317 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3318 TREE_SIDE_EFFECTS (t) = 1;
3319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3322 emit_jump_insn (gen_jump (lab_over));
3324 emit_label (lab_false);
3327 /* ... otherwise out of the overflow area. */
3329 /* Care for on-stack alignment if needed. */
3330 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3334 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3335 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3336 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3340 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3342 emit_move_insn (addr_rtx, r);
3345 build (PLUS_EXPR, TREE_TYPE (t), t,
3346 build_int_2 (rsize * UNITS_PER_WORD, 0));
3347 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3348 TREE_SIDE_EFFECTS (t) = 1;
3349 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3352 emit_label (lab_over);
3356 r = gen_rtx_MEM (Pmode, addr_rtx);
3357 set_mem_alias_set (r, get_varargs_alias_set ());
3358 emit_move_insn (addr_rtx, r);
3364 /* Return nonzero if OP is either a i387 or SSE fp register. */
3366 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3368 return ANY_FP_REG_P (op);
3371 /* Return nonzero if OP is an i387 fp register. */
3373 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3375 return FP_REG_P (op);
3378 /* Return nonzero if OP is a non-fp register_operand. */
3380 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3382 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3385 /* Return nonzero if OP is a register operand other than an
3386 i387 fp register. */
3388 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3390 return register_operand (op, mode) && !FP_REG_P (op);
3393 /* Return nonzero if OP is general operand representable on x86_64. */
3396 x86_64_general_operand (rtx op, enum machine_mode mode)
3399 return general_operand (op, mode);
3400 if (nonimmediate_operand (op, mode))
3402 return x86_64_sign_extended_value (op);
3405 /* Return nonzero if OP is general operand representable on x86_64
3406 as either sign extended or zero extended constant. */
3409 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3412 return general_operand (op, mode);
3413 if (nonimmediate_operand (op, mode))
3415 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3418 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3421 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3424 return nonmemory_operand (op, mode);
3425 if (register_operand (op, mode))
3427 return x86_64_sign_extended_value (op);
3430 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3433 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3435 if (!TARGET_64BIT || !flag_pic)
3436 return nonmemory_operand (op, mode);
3437 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3439 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3444 /* Return nonzero if OPNUM's MEM should be matched
3445 in movabs* patterns. */
3448 ix86_check_movabs (rtx insn, int opnum)
3452 set = PATTERN (insn);
3453 if (GET_CODE (set) == PARALLEL)
3454 set = XVECEXP (set, 0, 0);
3455 if (GET_CODE (set) != SET)
3457 mem = XEXP (set, opnum);
3458 while (GET_CODE (mem) == SUBREG)
3459 mem = SUBREG_REG (mem);
3460 if (GET_CODE (mem) != MEM)
3462 return (volatile_ok || !MEM_VOLATILE_P (mem));
3465 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3468 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3471 return nonmemory_operand (op, mode);
3472 if (register_operand (op, mode))
3474 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3477 /* Return nonzero if OP is immediate operand representable on x86_64. */
3480 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3483 return immediate_operand (op, mode);
3484 return x86_64_sign_extended_value (op);
3487 /* Return nonzero if OP is immediate operand representable on x86_64. */
3490 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3492 return x86_64_zero_extended_value (op);
3495 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3496 for shift & compare patterns, as shifting by 0 does not change flags),
3497 else return zero. */
3500 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3502 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3505 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3506 reference and a constant. */
3509 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3511 switch (GET_CODE (op))
3519 if (GET_CODE (op) == SYMBOL_REF
3520 || GET_CODE (op) == LABEL_REF
3521 || (GET_CODE (op) == UNSPEC
3522 && (XINT (op, 1) == UNSPEC_GOT
3523 || XINT (op, 1) == UNSPEC_GOTOFF
3524 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3526 if (GET_CODE (op) != PLUS
3527 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3531 if (GET_CODE (op) == SYMBOL_REF
3532 || GET_CODE (op) == LABEL_REF)
3534 /* Only @GOTOFF gets offsets. */
3535 if (GET_CODE (op) != UNSPEC
3536 || XINT (op, 1) != UNSPEC_GOTOFF)
3539 op = XVECEXP (op, 0, 0);
3540 if (GET_CODE (op) == SYMBOL_REF
3541 || GET_CODE (op) == LABEL_REF)
3550 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3553 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3555 if (GET_CODE (op) != CONST)
3560 if (GET_CODE (op) == UNSPEC
3561 && XINT (op, 1) == UNSPEC_GOTPCREL)
3563 if (GET_CODE (op) == PLUS
3564 && GET_CODE (XEXP (op, 0)) == UNSPEC
3565 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3570 if (GET_CODE (op) == UNSPEC)
3572 if (GET_CODE (op) != PLUS
3573 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3576 if (GET_CODE (op) == UNSPEC)
3582 /* Return true if OP is a symbolic operand that resolves locally. */
3585 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3587 if (GET_CODE (op) == CONST
3588 && GET_CODE (XEXP (op, 0)) == PLUS
3589 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3590 op = XEXP (XEXP (op, 0), 0);
3592 if (GET_CODE (op) == LABEL_REF)
3595 if (GET_CODE (op) != SYMBOL_REF)
3598 if (SYMBOL_REF_LOCAL_P (op))
3601 /* There is, however, a not insubstantial body of code in the rest of
3602 the compiler that assumes it can just stick the results of
3603 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3604 /* ??? This is a hack. Should update the body of the compiler to
3605 always create a DECL an invoke targetm.encode_section_info. */
3606 if (strncmp (XSTR (op, 0), internal_label_prefix,
3607 internal_label_prefix_len) == 0)
3613 /* Test for various thread-local symbols. */
3616 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3618 if (GET_CODE (op) != SYMBOL_REF)
3620 return SYMBOL_REF_TLS_MODEL (op);
3624 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3626 if (GET_CODE (op) != SYMBOL_REF)
3628 return SYMBOL_REF_TLS_MODEL (op) == kind;
3632 global_dynamic_symbolic_operand (rtx op,
3633 enum machine_mode mode ATTRIBUTE_UNUSED)
3635 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3639 local_dynamic_symbolic_operand (rtx op,
3640 enum machine_mode mode ATTRIBUTE_UNUSED)
3642 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3646 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3648 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3652 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3654 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3657 /* Test for a valid operand for a call instruction. Don't allow the
3658 arg pointer register or virtual regs since they may decay into
3659 reg + const, which the patterns can't handle. */
3662 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3664 /* Disallow indirect through a virtual register. This leads to
3665 compiler aborts when trying to eliminate them. */
3666 if (GET_CODE (op) == REG
3667 && (op == arg_pointer_rtx
3668 || op == frame_pointer_rtx
3669 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3670 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3673 /* Disallow `call 1234'. Due to varying assembler lameness this
3674 gets either rejected or translated to `call .+1234'. */
3675 if (GET_CODE (op) == CONST_INT)
3678 /* Explicitly allow SYMBOL_REF even if pic. */
3679 if (GET_CODE (op) == SYMBOL_REF)
3682 /* Otherwise we can allow any general_operand in the address. */
3683 return general_operand (op, Pmode);
3686 /* Test for a valid operand for a call instruction. Don't allow the
3687 arg pointer register or virtual regs since they may decay into
3688 reg + const, which the patterns can't handle. */
3691 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3693 /* Disallow indirect through a virtual register. This leads to
3694 compiler aborts when trying to eliminate them. */
3695 if (GET_CODE (op) == REG
3696 && (op == arg_pointer_rtx
3697 || op == frame_pointer_rtx
3698 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3699 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3702 /* Explicitly allow SYMBOL_REF even if pic. */
3703 if (GET_CODE (op) == SYMBOL_REF)
3706 /* Otherwise we can only allow register operands. */
3707 return register_operand (op, Pmode);
3711 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3713 if (GET_CODE (op) == CONST
3714 && GET_CODE (XEXP (op, 0)) == PLUS
3715 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3716 op = XEXP (XEXP (op, 0), 0);
3717 return GET_CODE (op) == SYMBOL_REF;
3720 /* Match exactly zero and one. */
3723 const0_operand (rtx op, enum machine_mode mode)
3725 return op == CONST0_RTX (mode);
3729 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3731 return op == const1_rtx;
3734 /* Match 2, 4, or 8. Used for leal multiplicands. */
3737 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3739 return (GET_CODE (op) == CONST_INT
3740 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3744 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3746 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3750 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3752 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3756 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3758 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3762 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3764 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3768 /* True if this is a constant appropriate for an increment or decrement. */
3771 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3773 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3774 registers, since carry flag is not set. */
3775 if (TARGET_PENTIUM4 && !optimize_size)
3777 return op == const1_rtx || op == constm1_rtx;
3780 /* Return nonzero if OP is acceptable as operand of DImode shift
3784 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3787 return nonimmediate_operand (op, mode);
3789 return register_operand (op, mode);
3792 /* Return false if this is the stack pointer, or any other fake
3793 register eliminable to the stack pointer. Otherwise, this is
3796 This is used to prevent esp from being used as an index reg.
3797 Which would only happen in pathological cases. */
3800 reg_no_sp_operand (rtx op, enum machine_mode mode)
3803 if (GET_CODE (t) == SUBREG)
3805 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3808 return register_operand (op, mode);
3812 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3814 return MMX_REG_P (op);
3817 /* Return false if this is any eliminable register. Otherwise
3821 general_no_elim_operand (rtx op, enum machine_mode mode)
3824 if (GET_CODE (t) == SUBREG)
3826 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3827 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3828 || t == virtual_stack_dynamic_rtx)
3831 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3832 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3835 return general_operand (op, mode);
3838 /* Return false if this is any eliminable register. Otherwise
3839 register_operand or const_int. */
3842 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3845 if (GET_CODE (t) == SUBREG)
3847 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3848 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3849 || t == virtual_stack_dynamic_rtx)
3852 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3855 /* Return false if this is any eliminable register or stack register,
3856 otherwise work like register_operand. */
3859 index_register_operand (rtx op, enum machine_mode mode)
3862 if (GET_CODE (t) == SUBREG)
3866 if (t == arg_pointer_rtx
3867 || t == frame_pointer_rtx
3868 || t == virtual_incoming_args_rtx
3869 || t == virtual_stack_vars_rtx
3870 || t == virtual_stack_dynamic_rtx
3871 || REGNO (t) == STACK_POINTER_REGNUM)
3874 return general_operand (op, mode);
3877 /* Return true if op is a Q_REGS class register. */
3880 q_regs_operand (rtx op, enum machine_mode mode)
3882 if (mode != VOIDmode && GET_MODE (op) != mode)
3884 if (GET_CODE (op) == SUBREG)
3885 op = SUBREG_REG (op);
3886 return ANY_QI_REG_P (op);
3889 /* Return true if op is an flags register. */
3892 flags_reg_operand (rtx op, enum machine_mode mode)
3894 if (mode != VOIDmode && GET_MODE (op) != mode)
3896 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3899 /* Return true if op is a NON_Q_REGS class register. */
3902 non_q_regs_operand (rtx op, enum machine_mode mode)
3904 if (mode != VOIDmode && GET_MODE (op) != mode)
3906 if (GET_CODE (op) == SUBREG)
3907 op = SUBREG_REG (op);
3908 return NON_QI_REG_P (op);
3912 zero_extended_scalar_load_operand (rtx op,
3913 enum machine_mode mode ATTRIBUTE_UNUSED)
3916 if (GET_CODE (op) != MEM)
3918 op = maybe_get_pool_constant (op);
3921 if (GET_CODE (op) != CONST_VECTOR)
3924 (GET_MODE_SIZE (GET_MODE (op)) /
3925 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3926 for (n_elts--; n_elts > 0; n_elts--)
3928 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3929 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3935 /* Return 1 when OP is operand acceptable for standard SSE move. */
3937 vector_move_operand (rtx op, enum machine_mode mode)
3939 if (nonimmediate_operand (op, mode))
3941 if (GET_MODE (op) != mode && mode != VOIDmode)
3943 return (op == CONST0_RTX (GET_MODE (op)));
3946 /* Return true if op if a valid address, and does not contain
3947 a segment override. */
3950 no_seg_address_operand (rtx op, enum machine_mode mode)
3952 struct ix86_address parts;
3954 if (! address_operand (op, mode))
3957 if (! ix86_decompose_address (op, &parts))
3960 return parts.seg == SEG_DEFAULT;
3963 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3966 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3968 enum rtx_code code = GET_CODE (op);
3971 /* Operations supported directly. */
3981 /* These are equivalent to ones above in non-IEEE comparisons. */
3988 return !TARGET_IEEE_FP;
3993 /* Return 1 if OP is a valid comparison operator in valid mode. */
3995 ix86_comparison_operator (rtx op, enum machine_mode mode)
3997 enum machine_mode inmode;
3998 enum rtx_code code = GET_CODE (op);
3999 if (mode != VOIDmode && GET_MODE (op) != mode)
4001 if (GET_RTX_CLASS (code) != '<')
4003 inmode = GET_MODE (XEXP (op, 0));
4005 if (inmode == CCFPmode || inmode == CCFPUmode)
4007 enum rtx_code second_code, bypass_code;
4008 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4009 return (bypass_code == NIL && second_code == NIL);
4016 if (inmode == CCmode || inmode == CCGCmode
4017 || inmode == CCGOCmode || inmode == CCNOmode)
4020 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4021 if (inmode == CCmode)
4025 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4033 /* Return 1 if OP is a valid comparison operator testing carry flag
4036 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4038 enum machine_mode inmode;
4039 enum rtx_code code = GET_CODE (op);
4041 if (mode != VOIDmode && GET_MODE (op) != mode)
4043 if (GET_RTX_CLASS (code) != '<')
4045 inmode = GET_MODE (XEXP (op, 0));
4046 if (GET_CODE (XEXP (op, 0)) != REG
4047 || REGNO (XEXP (op, 0)) != 17
4048 || XEXP (op, 1) != const0_rtx)
4051 if (inmode == CCFPmode || inmode == CCFPUmode)
4053 enum rtx_code second_code, bypass_code;
4055 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4056 if (bypass_code != NIL || second_code != NIL)
4058 code = ix86_fp_compare_code_to_integer (code);
4060 else if (inmode != CCmode)
4065 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4068 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4070 enum machine_mode inmode;
4071 enum rtx_code code = GET_CODE (op);
4073 if (mode != VOIDmode && GET_MODE (op) != mode)
4075 if (GET_RTX_CLASS (code) != '<')
4077 inmode = GET_MODE (XEXP (op, 0));
4078 if (inmode == CCFPmode || inmode == CCFPUmode)
4080 enum rtx_code second_code, bypass_code;
4082 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4083 if (bypass_code != NIL || second_code != NIL)
4085 code = ix86_fp_compare_code_to_integer (code);
4087 /* i387 supports just limited amount of conditional codes. */
4090 case LTU: case GTU: case LEU: case GEU:
4091 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4094 case ORDERED: case UNORDERED:
4102 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4105 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4107 switch (GET_CODE (op))
4110 /* Modern CPUs have same latency for HImode and SImode multiply,
4111 but 386 and 486 do HImode multiply faster. */
4112 return ix86_tune > PROCESSOR_I486;
4124 /* Nearly general operand, but accept any const_double, since we wish
4125 to be able to drop them into memory rather than have them get pulled
4129 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4131 if (mode != VOIDmode && mode != GET_MODE (op))
4133 if (GET_CODE (op) == CONST_DOUBLE)
4135 return general_operand (op, mode);
4138 /* Match an SI or HImode register for a zero_extract. */
4141 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4144 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4145 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4148 if (!register_operand (op, VOIDmode))
4151 /* Be careful to accept only registers having upper parts. */
4152 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4153 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4156 /* Return 1 if this is a valid binary floating-point operation.
4157 OP is the expression matched, and MODE is its mode. */
4160 binary_fp_operator (rtx op, enum machine_mode mode)
4162 if (mode != VOIDmode && mode != GET_MODE (op))
4165 switch (GET_CODE (op))
4171 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4179 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4181 return GET_CODE (op) == MULT;
4185 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4187 return GET_CODE (op) == DIV;
4191 arith_or_logical_operator (rtx op, enum machine_mode mode)
4193 return ((mode == VOIDmode || GET_MODE (op) == mode)
4194 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4195 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4198 /* Returns 1 if OP is memory operand with a displacement. */
4201 memory_displacement_operand (rtx op, enum machine_mode mode)
4203 struct ix86_address parts;
4205 if (! memory_operand (op, mode))
4208 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4211 return parts.disp != NULL_RTX;
4214 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4215 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4217 ??? It seems likely that this will only work because cmpsi is an
4218 expander, and no actual insns use this. */
4221 cmpsi_operand (rtx op, enum machine_mode mode)
4223 if (nonimmediate_operand (op, mode))
4226 if (GET_CODE (op) == AND
4227 && GET_MODE (op) == SImode
4228 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4229 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4230 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4231 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4232 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4233 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4239 /* Returns 1 if OP is memory operand that can not be represented by the
4243 long_memory_operand (rtx op, enum machine_mode mode)
4245 if (! memory_operand (op, mode))
4248 return memory_address_length (op) != 0;
4251 /* Return nonzero if the rtx is known aligned. */
4254 aligned_operand (rtx op, enum machine_mode mode)
4256 struct ix86_address parts;
4258 if (!general_operand (op, mode))
4261 /* Registers and immediate operands are always "aligned". */
4262 if (GET_CODE (op) != MEM)
4265 /* Don't even try to do any aligned optimizations with volatiles. */
4266 if (MEM_VOLATILE_P (op))
4271 /* Pushes and pops are only valid on the stack pointer. */
4272 if (GET_CODE (op) == PRE_DEC
4273 || GET_CODE (op) == POST_INC)
4276 /* Decode the address. */
4277 if (! ix86_decompose_address (op, &parts))
4280 /* Look for some component that isn't known to be aligned. */
4284 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4289 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4294 if (GET_CODE (parts.disp) != CONST_INT
4295 || (INTVAL (parts.disp) & 3) != 0)
4299 /* Didn't find one -- this must be an aligned address. */
4303 /* Initialize the table of extra 80387 mathematical constants. */
4306 init_ext_80387_constants (void)
4308 static const char * cst[5] =
4310 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4311 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4312 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4313 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4314 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4318 for (i = 0; i < 5; i++)
4320 real_from_string (&ext_80387_constants_table[i], cst[i]);
4321 /* Ensure each constant is rounded to XFmode precision. */
4322 real_convert (&ext_80387_constants_table[i],
4323 XFmode, &ext_80387_constants_table[i]);
4326 ext_80387_constants_init = 1;
4329 /* Return true if the constant is something that can be loaded with
4330 a special instruction. */
4333 standard_80387_constant_p (rtx x)
4335 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4338 if (x == CONST0_RTX (GET_MODE (x)))
4340 if (x == CONST1_RTX (GET_MODE (x)))
4343 /* For XFmode constants, try to find a special 80387 instruction on
4344 those CPUs that benefit from them. */
4345 if (GET_MODE (x) == XFmode
4346 && x86_ext_80387_constants & TUNEMASK)
4351 if (! ext_80387_constants_init)
4352 init_ext_80387_constants ();
4354 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4355 for (i = 0; i < 5; i++)
4356 if (real_identical (&r, &ext_80387_constants_table[i]))
4363 /* Return the opcode of the special instruction to be used to load
4367 standard_80387_constant_opcode (rtx x)
4369 switch (standard_80387_constant_p (x))
4389 /* Return the CONST_DOUBLE representing the 80387 constant that is
4390 loaded by the specified special instruction. The argument IDX
4391 matches the return value from standard_80387_constant_p. */
4394 standard_80387_constant_rtx (int idx)
4398 if (! ext_80387_constants_init)
4399 init_ext_80387_constants ();
4415 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4419 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4422 standard_sse_constant_p (rtx x)
4424 if (x == const0_rtx)
4426 return (x == CONST0_RTX (GET_MODE (x)));
4429 /* Returns 1 if OP contains a symbol reference */
4432 symbolic_reference_mentioned_p (rtx op)
4437 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4440 fmt = GET_RTX_FORMAT (GET_CODE (op));
4441 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4447 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4448 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4452 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4459 /* Return 1 if it is appropriate to emit `ret' instructions in the
4460 body of a function. Do this only if the epilogue is simple, needing a
4461 couple of insns. Prior to reloading, we can't tell how many registers
4462 must be saved, so return 0 then. Return 0 if there is no frame
4463 marker to de-allocate.
4465 If NON_SAVING_SETJMP is defined and true, then it is not possible
4466 for the epilogue to be simple, so return 0. This is a special case
4467 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4468 until final, but jump_optimize may need to know sooner if a
4472 ix86_can_use_return_insn_p (void)
4474 struct ix86_frame frame;
4476 #ifdef NON_SAVING_SETJMP
4477 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4481 if (! reload_completed || frame_pointer_needed)
4484 /* Don't allow more than 32 pop, since that's all we can do
4485 with one instruction. */
4486 if (current_function_pops_args
4487 && current_function_args_size >= 32768)
4490 ix86_compute_frame_layout (&frame);
4491 return frame.to_allocate == 0 && frame.nregs == 0;
4494 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4496 x86_64_sign_extended_value (rtx value)
4498 switch (GET_CODE (value))
4500 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4501 to be at least 32 and this all acceptable constants are
4502 represented as CONST_INT. */
4504 if (HOST_BITS_PER_WIDE_INT == 32)
4508 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4509 return trunc_int_for_mode (val, SImode) == val;
4513 /* For certain code models, the symbolic references are known to fit.
4514 in CM_SMALL_PIC model we know it fits if it is local to the shared
4515 library. Don't count TLS SYMBOL_REFs here, since they should fit
4516 only if inside of UNSPEC handled below. */
4518 /* TLS symbols are not constant. */
4519 if (tls_symbolic_operand (value, Pmode))
4521 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4523 /* For certain code models, the code is near as well. */
4525 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4526 || ix86_cmodel == CM_KERNEL);
4528 /* We also may accept the offsetted memory references in certain special
4531 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4532 switch (XINT (XEXP (value, 0), 1))
4534 case UNSPEC_GOTPCREL:
4536 case UNSPEC_GOTNTPOFF:
4542 if (GET_CODE (XEXP (value, 0)) == PLUS)
4544 rtx op1 = XEXP (XEXP (value, 0), 0);
4545 rtx op2 = XEXP (XEXP (value, 0), 1);
4546 HOST_WIDE_INT offset;
4548 if (ix86_cmodel == CM_LARGE)
4550 if (GET_CODE (op2) != CONST_INT)
4552 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4553 switch (GET_CODE (op1))
4556 /* For CM_SMALL assume that latest object is 16MB before
4557 end of 31bits boundary. We may also accept pretty
4558 large negative constants knowing that all objects are
4559 in the positive half of address space. */
4560 if (ix86_cmodel == CM_SMALL
4561 && offset < 16*1024*1024
4562 && trunc_int_for_mode (offset, SImode) == offset)
4564 /* For CM_KERNEL we know that all object resist in the
4565 negative half of 32bits address space. We may not
4566 accept negative offsets, since they may be just off
4567 and we may accept pretty large positive ones. */
4568 if (ix86_cmodel == CM_KERNEL
4570 && trunc_int_for_mode (offset, SImode) == offset)
4574 /* These conditions are similar to SYMBOL_REF ones, just the
4575 constraints for code models differ. */
4576 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4577 && offset < 16*1024*1024
4578 && trunc_int_for_mode (offset, SImode) == offset)
4580 if (ix86_cmodel == CM_KERNEL
4582 && trunc_int_for_mode (offset, SImode) == offset)
4586 switch (XINT (op1, 1))
4591 && trunc_int_for_mode (offset, SImode) == offset)
4605 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4607 x86_64_zero_extended_value (rtx value)
4609 switch (GET_CODE (value))
4612 if (HOST_BITS_PER_WIDE_INT == 32)
4613 return (GET_MODE (value) == VOIDmode
4614 && !CONST_DOUBLE_HIGH (value));
4618 if (HOST_BITS_PER_WIDE_INT == 32)
4619 return INTVAL (value) >= 0;
4621 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4624 /* For certain code models, the symbolic references are known to fit. */
4626 /* TLS symbols are not constant. */
4627 if (tls_symbolic_operand (value, Pmode))
4629 return ix86_cmodel == CM_SMALL;
4631 /* For certain code models, the code is near as well. */
4633 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4635 /* We also may accept the offsetted memory references in certain special
4638 if (GET_CODE (XEXP (value, 0)) == PLUS)
4640 rtx op1 = XEXP (XEXP (value, 0), 0);
4641 rtx op2 = XEXP (XEXP (value, 0), 1);
4643 if (ix86_cmodel == CM_LARGE)
4645 switch (GET_CODE (op1))
4649 /* For small code model we may accept pretty large positive
4650 offsets, since one bit is available for free. Negative
4651 offsets are limited by the size of NULL pointer area
4652 specified by the ABI. */
4653 if (ix86_cmodel == CM_SMALL
4654 && GET_CODE (op2) == CONST_INT
4655 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4656 && (trunc_int_for_mode (INTVAL (op2), SImode)
4659 /* ??? For the kernel, we may accept adjustment of
4660 -0x10000000, since we know that it will just convert
4661 negative address space to positive, but perhaps this
4662 is not worthwhile. */
4665 /* These conditions are similar to SYMBOL_REF ones, just the
4666 constraints for code models differ. */
4667 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4668 && GET_CODE (op2) == CONST_INT
4669 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4670 && (trunc_int_for_mode (INTVAL (op2), SImode)
4684 /* Value should be nonzero if functions must have frame pointers.
4685 Zero means the frame pointer need not be set up (and parms may
4686 be accessed via the stack pointer) in functions that seem suitable. */
4689 ix86_frame_pointer_required (void)
4691 /* If we accessed previous frames, then the generated code expects
4692 to be able to access the saved ebp value in our frame. */
4693 if (cfun->machine->accesses_prev_frame)
4696 /* Several x86 os'es need a frame pointer for other reasons,
4697 usually pertaining to setjmp. */
4698 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4701 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4702 the frame pointer by default. Turn it back on now if we've not
4703 got a leaf function. */
4704 if (TARGET_OMIT_LEAF_FRAME_POINTER
4705 && (!current_function_is_leaf))
4708 if (current_function_profile)
4714 /* Record that the current function accesses previous call frames. */
4717 ix86_setup_frame_addresses (void)
4719 cfun->machine->accesses_prev_frame = 1;
4722 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4723 # define USE_HIDDEN_LINKONCE 1
4725 # define USE_HIDDEN_LINKONCE 0
4728 static int pic_labels_used;
4730 /* Fills in the label name that should be used for a pc thunk for
4731 the given register. */
4734 get_pc_thunk_name (char name[32], unsigned int regno)
4736 if (USE_HIDDEN_LINKONCE)
4737 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4739 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4743 /* This function generates code for -fpic that loads %ebx with
4744 the return address of the caller and then returns. */
4747 ix86_file_end (void)
4752 for (regno = 0; regno < 8; ++regno)
4756 if (! ((pic_labels_used >> regno) & 1))
4759 get_pc_thunk_name (name, regno);
4761 if (USE_HIDDEN_LINKONCE)
4765 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4767 TREE_PUBLIC (decl) = 1;
4768 TREE_STATIC (decl) = 1;
4769 DECL_ONE_ONLY (decl) = 1;
4771 (*targetm.asm_out.unique_section) (decl, 0);
4772 named_section (decl, NULL, 0);
4774 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4775 fputs ("\t.hidden\t", asm_out_file);
4776 assemble_name (asm_out_file, name);
4777 fputc ('\n', asm_out_file);
4778 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4783 ASM_OUTPUT_LABEL (asm_out_file, name);
4786 xops[0] = gen_rtx_REG (SImode, regno);
4787 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4788 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4789 output_asm_insn ("ret", xops);
4792 if (NEED_INDICATE_EXEC_STACK)
4793 file_end_indicate_exec_stack ();
4796 /* Emit code for the SET_GOT patterns. */
4799 output_set_got (rtx dest)
4804 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4806 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4808 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4811 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4813 output_asm_insn ("call\t%a2", xops);
4816 /* Output the "canonical" label name ("Lxx$pb") here too. This
4817 is what will be referred to by the Mach-O PIC subsystem. */
4818 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4820 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4821 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4824 output_asm_insn ("pop{l}\t%0", xops);
4829 get_pc_thunk_name (name, REGNO (dest));
4830 pic_labels_used |= 1 << REGNO (dest);
4832 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4833 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4834 output_asm_insn ("call\t%X2", xops);
4837 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4838 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4839 else if (!TARGET_MACHO)
4840 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4845 /* Generate an "push" pattern for input ARG. */
4850 return gen_rtx_SET (VOIDmode,
4852 gen_rtx_PRE_DEC (Pmode,
4853 stack_pointer_rtx)),
4857 /* Return >= 0 if there is an unused call-clobbered register available
4858 for the entire function. */
4861 ix86_select_alt_pic_regnum (void)
4863 if (current_function_is_leaf && !current_function_profile)
4866 for (i = 2; i >= 0; --i)
4867 if (!regs_ever_live[i])
4871 return INVALID_REGNUM;
4874 /* Return 1 if we need to save REGNO. */
4876 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4878 if (pic_offset_table_rtx
4879 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4880 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4881 || current_function_profile
4882 || current_function_calls_eh_return
4883 || current_function_uses_const_pool))
4885 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4890 if (current_function_calls_eh_return && maybe_eh_return)
4895 unsigned test = EH_RETURN_DATA_REGNO (i);
4896 if (test == INVALID_REGNUM)
4903 return (regs_ever_live[regno]
4904 && !call_used_regs[regno]
4905 && !fixed_regs[regno]
4906 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4909 /* Return number of registers to be saved on the stack. */
4912 ix86_nsaved_regs (void)
4917 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4918 if (ix86_save_reg (regno, true))
4923 /* Return the offset between two registers, one to be eliminated, and the other
4924 its replacement, at the start of a routine. */
4927 ix86_initial_elimination_offset (int from, int to)
4929 struct ix86_frame frame;
4930 ix86_compute_frame_layout (&frame);
4932 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4933 return frame.hard_frame_pointer_offset;
4934 else if (from == FRAME_POINTER_REGNUM
4935 && to == HARD_FRAME_POINTER_REGNUM)
4936 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4939 if (to != STACK_POINTER_REGNUM)
4941 else if (from == ARG_POINTER_REGNUM)
4942 return frame.stack_pointer_offset;
4943 else if (from != FRAME_POINTER_REGNUM)
4946 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4950 /* Fill structure ix86_frame about frame of currently computed function. */
4953 ix86_compute_frame_layout (struct ix86_frame *frame)
4955 HOST_WIDE_INT total_size;
4956 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4957 HOST_WIDE_INT offset;
4958 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4959 HOST_WIDE_INT size = get_frame_size ();
4961 frame->nregs = ix86_nsaved_regs ();
4964 /* During reload iteration the amount of registers saved can change.
4965 Recompute the value as needed. Do not recompute when amount of registers
4966 didn't change as reload does mutiple calls to the function and does not
4967 expect the decision to change within single iteration. */
4969 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4971 int count = frame->nregs;
4973 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4974 /* The fast prologue uses move instead of push to save registers. This
4975 is significantly longer, but also executes faster as modern hardware
4976 can execute the moves in parallel, but can't do that for push/pop.
4978 Be careful about choosing what prologue to emit: When function takes
4979 many instructions to execute we may use slow version as well as in
4980 case function is known to be outside hot spot (this is known with
4981 feedback only). Weight the size of function by number of registers
4982 to save as it is cheap to use one or two push instructions but very
4983 slow to use many of them. */
4985 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4986 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4987 || (flag_branch_probabilities
4988 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4989 cfun->machine->use_fast_prologue_epilogue = false;
4991 cfun->machine->use_fast_prologue_epilogue
4992 = !expensive_function_p (count);
4994 if (TARGET_PROLOGUE_USING_MOVE
4995 && cfun->machine->use_fast_prologue_epilogue)
4996 frame->save_regs_using_mov = true;
4998 frame->save_regs_using_mov = false;
5001 /* Skip return address and saved base pointer. */
5002 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5004 frame->hard_frame_pointer_offset = offset;
5006 /* Do some sanity checking of stack_alignment_needed and
5007 preferred_alignment, since i386 port is the only using those features
5008 that may break easily. */
5010 if (size && !stack_alignment_needed)
5012 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5014 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5016 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5019 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5020 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5022 /* Register save area */
5023 offset += frame->nregs * UNITS_PER_WORD;
5026 if (ix86_save_varrargs_registers)
5028 offset += X86_64_VARARGS_SIZE;
5029 frame->va_arg_size = X86_64_VARARGS_SIZE;
5032 frame->va_arg_size = 0;
5034 /* Align start of frame for local function. */
5035 frame->padding1 = ((offset + stack_alignment_needed - 1)
5036 & -stack_alignment_needed) - offset;
5038 offset += frame->padding1;
5040 /* Frame pointer points here. */
5041 frame->frame_pointer_offset = offset;
5045 /* Add outgoing arguments area. Can be skipped if we eliminated
5046 all the function calls as dead code.
5047 Skipping is however impossible when function calls alloca. Alloca
5048 expander assumes that last current_function_outgoing_args_size
5049 of stack frame are unused. */
5050 if (ACCUMULATE_OUTGOING_ARGS
5051 && (!current_function_is_leaf || current_function_calls_alloca))
5053 offset += current_function_outgoing_args_size;
5054 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5057 frame->outgoing_arguments_size = 0;
5059 /* Align stack boundary. Only needed if we're calling another function
5061 if (!current_function_is_leaf || current_function_calls_alloca)
5062 frame->padding2 = ((offset + preferred_alignment - 1)
5063 & -preferred_alignment) - offset;
5065 frame->padding2 = 0;
5067 offset += frame->padding2;
5069 /* We've reached end of stack frame. */
5070 frame->stack_pointer_offset = offset;
5072 /* Size prologue needs to allocate. */
5073 frame->to_allocate =
5074 (size + frame->padding1 + frame->padding2
5075 + frame->outgoing_arguments_size + frame->va_arg_size);
5077 if ((!frame->to_allocate && frame->nregs <= 1)
5078 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5079 frame->save_regs_using_mov = false;
5081 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5082 && current_function_is_leaf)
5084 frame->red_zone_size = frame->to_allocate;
5085 if (frame->save_regs_using_mov)
5086 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5087 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5088 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5091 frame->red_zone_size = 0;
5092 frame->to_allocate -= frame->red_zone_size;
5093 frame->stack_pointer_offset -= frame->red_zone_size;
5095 fprintf (stderr, "nregs: %i\n", frame->nregs);
5096 fprintf (stderr, "size: %i\n", size);
5097 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5098 fprintf (stderr, "padding1: %i\n", frame->padding1);
5099 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5100 fprintf (stderr, "padding2: %i\n", frame->padding2);
5101 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5102 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5103 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5104 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5105 frame->hard_frame_pointer_offset);
5106 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5110 /* Emit code to save registers in the prologue. */
5113 ix86_emit_save_regs (void)
5118 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5119 if (ix86_save_reg (regno, true))
5121 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5122 RTX_FRAME_RELATED_P (insn) = 1;
5126 /* Emit code to save registers using MOV insns. First register
5127 is restored from POINTER + OFFSET. */
5129 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5134 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5135 if (ix86_save_reg (regno, true))
5137 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5139 gen_rtx_REG (Pmode, regno));
5140 RTX_FRAME_RELATED_P (insn) = 1;
5141 offset += UNITS_PER_WORD;
5145 /* Expand prologue or epilogue stack adjustment.
5146 The pattern exist to put a dependency on all ebp-based memory accesses.
5147 STYLE should be negative if instructions should be marked as frame related,
5148 zero if %r11 register is live and cannot be freely used and positive
5152 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5157 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5158 else if (x86_64_immediate_operand (offset, DImode))
5159 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5163 /* r11 is used by indirect sibcall return as well, set before the
5164 epilogue and used after the epilogue. ATM indirect sibcall
5165 shouldn't be used together with huge frame sizes in one
5166 function because of the frame_size check in sibcall.c. */
5169 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5170 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5172 RTX_FRAME_RELATED_P (insn) = 1;
5173 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5177 RTX_FRAME_RELATED_P (insn) = 1;
5180 /* Expand the prologue into a bunch of separate insns. */
5183 ix86_expand_prologue (void)
5187 struct ix86_frame frame;
5188 HOST_WIDE_INT allocate;
5190 ix86_compute_frame_layout (&frame);
5192 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5193 slower on all targets. Also sdb doesn't like it. */
5195 if (frame_pointer_needed)
5197 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5198 RTX_FRAME_RELATED_P (insn) = 1;
5200 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5201 RTX_FRAME_RELATED_P (insn) = 1;
5204 allocate = frame.to_allocate;
5206 if (!frame.save_regs_using_mov)
5207 ix86_emit_save_regs ();
5209 allocate += frame.nregs * UNITS_PER_WORD;
5211 /* When using red zone we may start register saving before allocating
5212 the stack frame saving one cycle of the prologue. */
5213 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5214 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5215 : stack_pointer_rtx,
5216 -frame.nregs * UNITS_PER_WORD);
5220 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5221 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5222 GEN_INT (-allocate), -1);
5225 /* Only valid for Win32. */
5226 rtx eax = gen_rtx_REG (SImode, 0);
5227 bool eax_live = ix86_eax_live_at_start_p ();
5234 emit_insn (gen_push (eax));
5238 insn = emit_move_insn (eax, GEN_INT (allocate));
5239 RTX_FRAME_RELATED_P (insn) = 1;
5241 insn = emit_insn (gen_allocate_stack_worker (eax));
5242 RTX_FRAME_RELATED_P (insn) = 1;
5246 rtx t = plus_constant (stack_pointer_rtx, allocate);
5247 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5251 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5253 if (!frame_pointer_needed || !frame.to_allocate)
5254 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5256 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5257 -frame.nregs * UNITS_PER_WORD);
5260 pic_reg_used = false;
5261 if (pic_offset_table_rtx
5262 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5263 || current_function_profile))
5265 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5267 if (alt_pic_reg_used != INVALID_REGNUM)
5268 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5270 pic_reg_used = true;
5275 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5277 /* Even with accurate pre-reload life analysis, we can wind up
5278 deleting all references to the pic register after reload.
5279 Consider if cross-jumping unifies two sides of a branch
5280 controlled by a comparison vs the only read from a global.
5281 In which case, allow the set_got to be deleted, though we're
5282 too late to do anything about the ebx save in the prologue. */
5283 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5286 /* Prevent function calls from be scheduled before the call to mcount.
5287 In the pic_reg_used case, make sure that the got load isn't deleted. */
5288 if (current_function_profile)
5289 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5292 /* Emit code to restore saved registers using MOV insns. First register
5293 is restored from POINTER + OFFSET. */
5295 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5296 int maybe_eh_return)
5299 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5301 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5302 if (ix86_save_reg (regno, maybe_eh_return))
5304 /* Ensure that adjust_address won't be forced to produce pointer
5305 out of range allowed by x86-64 instruction set. */
5306 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5310 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5311 emit_move_insn (r11, GEN_INT (offset));
5312 emit_insn (gen_adddi3 (r11, r11, pointer));
5313 base_address = gen_rtx_MEM (Pmode, r11);
5316 emit_move_insn (gen_rtx_REG (Pmode, regno),
5317 adjust_address (base_address, Pmode, offset));
5318 offset += UNITS_PER_WORD;
5322 /* Restore function stack, frame, and registers. */
5325 ix86_expand_epilogue (int style)
5328 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5329 struct ix86_frame frame;
5330 HOST_WIDE_INT offset;
5332 ix86_compute_frame_layout (&frame);
5334 /* Calculate start of saved registers relative to ebp. Special care
5335 must be taken for the normal return case of a function using
5336 eh_return: the eax and edx registers are marked as saved, but not
5337 restored along this path. */
5338 offset = frame.nregs;
5339 if (current_function_calls_eh_return && style != 2)
5341 offset *= -UNITS_PER_WORD;
5343 /* If we're only restoring one register and sp is not valid then
5344 using a move instruction to restore the register since it's
5345 less work than reloading sp and popping the register.
5347 The default code result in stack adjustment using add/lea instruction,
5348 while this code results in LEAVE instruction (or discrete equivalent),
5349 so it is profitable in some other cases as well. Especially when there
5350 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5351 and there is exactly one register to pop. This heuristic may need some
5352 tuning in future. */
5353 if ((!sp_valid && frame.nregs <= 1)
5354 || (TARGET_EPILOGUE_USING_MOVE
5355 && cfun->machine->use_fast_prologue_epilogue
5356 && (frame.nregs > 1 || frame.to_allocate))
5357 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5358 || (frame_pointer_needed && TARGET_USE_LEAVE
5359 && cfun->machine->use_fast_prologue_epilogue
5360 && frame.nregs == 1)
5361 || current_function_calls_eh_return)
5363 /* Restore registers. We can use ebp or esp to address the memory
5364 locations. If both are available, default to ebp, since offsets
5365 are known to be small. Only exception is esp pointing directly to the
5366 end of block of saved registers, where we may simplify addressing
5369 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5370 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5371 frame.to_allocate, style == 2);
5373 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5374 offset, style == 2);
5376 /* eh_return epilogues need %ecx added to the stack pointer. */
5379 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5381 if (frame_pointer_needed)
5383 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5384 tmp = plus_constant (tmp, UNITS_PER_WORD);
5385 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5387 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5388 emit_move_insn (hard_frame_pointer_rtx, tmp);
5390 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5395 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5396 tmp = plus_constant (tmp, (frame.to_allocate
5397 + frame.nregs * UNITS_PER_WORD));
5398 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5401 else if (!frame_pointer_needed)
5402 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5403 GEN_INT (frame.to_allocate
5404 + frame.nregs * UNITS_PER_WORD),
5406 /* If not an i386, mov & pop is faster than "leave". */
5407 else if (TARGET_USE_LEAVE || optimize_size
5408 || !cfun->machine->use_fast_prologue_epilogue)
5409 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5412 pro_epilogue_adjust_stack (stack_pointer_rtx,
5413 hard_frame_pointer_rtx,
5416 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5418 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5423 /* First step is to deallocate the stack frame so that we can
5424 pop the registers. */
5427 if (!frame_pointer_needed)
5429 pro_epilogue_adjust_stack (stack_pointer_rtx,
5430 hard_frame_pointer_rtx,
5431 GEN_INT (offset), style);
5433 else if (frame.to_allocate)
5434 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5435 GEN_INT (frame.to_allocate), style);
5437 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5438 if (ix86_save_reg (regno, false))
5441 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5443 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5445 if (frame_pointer_needed)
5447 /* Leave results in shorter dependency chains on CPUs that are
5448 able to grok it fast. */
5449 if (TARGET_USE_LEAVE)
5450 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5451 else if (TARGET_64BIT)
5452 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5454 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5458 /* Sibcall epilogues don't want a return instruction. */
5462 if (current_function_pops_args && current_function_args_size)
5464 rtx popc = GEN_INT (current_function_pops_args);
5466 /* i386 can only pop 64K bytes. If asked to pop more, pop
5467 return address, do explicit add, and jump indirectly to the
5470 if (current_function_pops_args >= 65536)
5472 rtx ecx = gen_rtx_REG (SImode, 2);
5474 /* There is no "pascal" calling convention in 64bit ABI. */
5478 emit_insn (gen_popsi1 (ecx));
5479 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5480 emit_jump_insn (gen_return_indirect_internal (ecx));
5483 emit_jump_insn (gen_return_pop_internal (popc));
5486 emit_jump_insn (gen_return_internal ());
5489 /* Reset from the function's potential modifications. */
5492 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5493 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5495 if (pic_offset_table_rtx)
5496 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5499 /* Extract the parts of an RTL expression that is a valid memory address
5500 for an instruction. Return 0 if the structure of the address is
5501 grossly off. Return -1 if the address contains ASHIFT, so it is not
5502 strictly valid, but still used for computing length of lea instruction. */
5505 ix86_decompose_address (rtx addr, struct ix86_address *out)
5507 rtx base = NULL_RTX;
5508 rtx index = NULL_RTX;
5509 rtx disp = NULL_RTX;
5510 HOST_WIDE_INT scale = 1;
5511 rtx scale_rtx = NULL_RTX;
5513 enum ix86_address_seg seg = SEG_DEFAULT;
5515 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5517 else if (GET_CODE (addr) == PLUS)
5527 addends[n++] = XEXP (op, 1);
5530 while (GET_CODE (op) == PLUS);
5535 for (i = n; i >= 0; --i)
5538 switch (GET_CODE (op))
5543 index = XEXP (op, 0);
5544 scale_rtx = XEXP (op, 1);
5548 if (XINT (op, 1) == UNSPEC_TP
5549 && TARGET_TLS_DIRECT_SEG_REFS
5550 && seg == SEG_DEFAULT)
5551 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5580 else if (GET_CODE (addr) == MULT)
5582 index = XEXP (addr, 0); /* index*scale */
5583 scale_rtx = XEXP (addr, 1);
5585 else if (GET_CODE (addr) == ASHIFT)
5589 /* We're called for lea too, which implements ashift on occasion. */
5590 index = XEXP (addr, 0);
5591 tmp = XEXP (addr, 1);
5592 if (GET_CODE (tmp) != CONST_INT)
5594 scale = INTVAL (tmp);
5595 if ((unsigned HOST_WIDE_INT) scale > 3)
5601 disp = addr; /* displacement */
5603 /* Extract the integral value of scale. */
5606 if (GET_CODE (scale_rtx) != CONST_INT)
5608 scale = INTVAL (scale_rtx);
5611 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5612 if (base && index && scale == 1
5613 && (index == arg_pointer_rtx
5614 || index == frame_pointer_rtx
5615 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5622 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5623 if ((base == hard_frame_pointer_rtx
5624 || base == frame_pointer_rtx
5625 || base == arg_pointer_rtx) && !disp)
5628 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5629 Avoid this by transforming to [%esi+0]. */
5630 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5631 && base && !index && !disp
5633 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5636 /* Special case: encode reg+reg instead of reg*2. */
5637 if (!base && index && scale && scale == 2)
5638 base = index, scale = 1;
5640 /* Special case: scaling cannot be encoded without base or displacement. */
5641 if (!base && !disp && index && scale != 1)
5653 /* Return cost of the memory address x.
5654 For i386, it is better to use a complex address than let gcc copy
5655 the address into a reg and make a new pseudo. But not if the address
5656 requires to two regs - that would mean more pseudos with longer
5659 ix86_address_cost (rtx x)
5661 struct ix86_address parts;
5664 if (!ix86_decompose_address (x, &parts))
5667 /* More complex memory references are better. */
5668 if (parts.disp && parts.disp != const0_rtx)
5670 if (parts.seg != SEG_DEFAULT)
5673 /* Attempt to minimize number of registers in the address. */
5675 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5677 && (!REG_P (parts.index)
5678 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5682 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5684 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5685 && parts.base != parts.index)
5688 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5689 since it's predecode logic can't detect the length of instructions
5690 and it degenerates to vector decoded. Increase cost of such
5691 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5692 to split such addresses or even refuse such addresses at all.
5694 Following addressing modes are affected:
5699 The first and last case may be avoidable by explicitly coding the zero in
5700 memory address, but I don't have AMD-K6 machine handy to check this
5704 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5705 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5706 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5712 /* If X is a machine specific address (i.e. a symbol or label being
5713 referenced as a displacement from the GOT implemented using an
5714 UNSPEC), then return the base term. Otherwise return X. */
5717 ix86_find_base_term (rtx x)
5723 if (GET_CODE (x) != CONST)
5726 if (GET_CODE (term) == PLUS
5727 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5728 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5729 term = XEXP (term, 0);
5730 if (GET_CODE (term) != UNSPEC
5731 || XINT (term, 1) != UNSPEC_GOTPCREL)
5734 term = XVECEXP (term, 0, 0);
5736 if (GET_CODE (term) != SYMBOL_REF
5737 && GET_CODE (term) != LABEL_REF)
5743 term = ix86_delegitimize_address (x);
5745 if (GET_CODE (term) != SYMBOL_REF
5746 && GET_CODE (term) != LABEL_REF)
5752 /* Determine if a given RTX is a valid constant. We already know this
5753 satisfies CONSTANT_P. */
5756 legitimate_constant_p (rtx x)
5760 switch (GET_CODE (x))
5763 /* TLS symbols are not constant. */
5764 if (tls_symbolic_operand (x, Pmode))
5769 inner = XEXP (x, 0);
5771 /* Offsets of TLS symbols are never valid.
5772 Discourage CSE from creating them. */
5773 if (GET_CODE (inner) == PLUS
5774 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5777 if (GET_CODE (inner) == PLUS)
5779 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5781 inner = XEXP (inner, 0);
5784 /* Only some unspecs are valid as "constants". */
5785 if (GET_CODE (inner) == UNSPEC)
5786 switch (XINT (inner, 1))
5790 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5792 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5802 /* Otherwise we handle everything else in the move patterns. */
5806 /* Determine if it's legal to put X into the constant pool. This
5807 is not possible for the address of thread-local symbols, which
5808 is checked above. */
5811 ix86_cannot_force_const_mem (rtx x)
5813 return !legitimate_constant_p (x);
5816 /* Determine if a given RTX is a valid constant address. */
5819 constant_address_p (rtx x)
5821 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5824 /* Nonzero if the constant value X is a legitimate general operand
5825 when generating PIC code. It is given that flag_pic is on and
5826 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5829 legitimate_pic_operand_p (rtx x)
5833 switch (GET_CODE (x))
5836 inner = XEXP (x, 0);
5838 /* Only some unspecs are valid as "constants". */
5839 if (GET_CODE (inner) == UNSPEC)
5840 switch (XINT (inner, 1))
5843 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5851 return legitimate_pic_address_disp_p (x);
5858 /* Determine if a given CONST RTX is a valid memory displacement
5862 legitimate_pic_address_disp_p (rtx disp)
5866 /* In 64bit mode we can allow direct addresses of symbols and labels
5867 when they are not dynamic symbols. */
5870 /* TLS references should always be enclosed in UNSPEC. */
5871 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5873 if (GET_CODE (disp) == SYMBOL_REF
5874 && ix86_cmodel == CM_SMALL_PIC
5875 && SYMBOL_REF_LOCAL_P (disp))
5877 if (GET_CODE (disp) == LABEL_REF)
5879 if (GET_CODE (disp) == CONST
5880 && GET_CODE (XEXP (disp, 0)) == PLUS)
5882 rtx op0 = XEXP (XEXP (disp, 0), 0);
5883 rtx op1 = XEXP (XEXP (disp, 0), 1);
5885 /* TLS references should always be enclosed in UNSPEC. */
5886 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5888 if (((GET_CODE (op0) == SYMBOL_REF
5889 && ix86_cmodel == CM_SMALL_PIC
5890 && SYMBOL_REF_LOCAL_P (op0))
5891 || GET_CODE (op0) == LABEL_REF)
5892 && GET_CODE (op1) == CONST_INT
5893 && INTVAL (op1) < 16*1024*1024
5894 && INTVAL (op1) >= -16*1024*1024)
5898 if (GET_CODE (disp) != CONST)
5900 disp = XEXP (disp, 0);
5904 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5905 of GOT tables. We should not need these anyway. */
5906 if (GET_CODE (disp) != UNSPEC
5907 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5910 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5911 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5917 if (GET_CODE (disp) == PLUS)
5919 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5921 disp = XEXP (disp, 0);
5925 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5926 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5928 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5929 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5930 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5932 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5933 if (! strcmp (sym_name, "<pic base>"))
5938 if (GET_CODE (disp) != UNSPEC)
5941 switch (XINT (disp, 1))
5946 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5948 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5949 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5950 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5952 case UNSPEC_GOTTPOFF:
5953 case UNSPEC_GOTNTPOFF:
5954 case UNSPEC_INDNTPOFF:
5957 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5959 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5961 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5967 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5968 memory address for an instruction. The MODE argument is the machine mode
5969 for the MEM expression that wants to use this address.
5971 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5972 convert common non-canonical forms to canonical form so that they will
5976 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5978 struct ix86_address parts;
5979 rtx base, index, disp;
5980 HOST_WIDE_INT scale;
5981 const char *reason = NULL;
5982 rtx reason_rtx = NULL_RTX;
5984 if (TARGET_DEBUG_ADDR)
5987 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5988 GET_MODE_NAME (mode), strict);
5992 if (ix86_decompose_address (addr, &parts) <= 0)
5994 reason = "decomposition failed";
5999 index = parts.index;
6001 scale = parts.scale;
6003 /* Validate base register.
6005 Don't allow SUBREG's here, it can lead to spill failures when the base
6006 is one word out of a two word structure, which is represented internally
6013 if (GET_CODE (base) != REG)
6015 reason = "base is not a register";
6019 if (GET_MODE (base) != Pmode)
6021 reason = "base is not in Pmode";
6025 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6026 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6028 reason = "base is not valid";
6033 /* Validate index register.
6035 Don't allow SUBREG's here, it can lead to spill failures when the index
6036 is one word out of a two word structure, which is represented internally
6043 if (GET_CODE (index) != REG)
6045 reason = "index is not a register";
6049 if (GET_MODE (index) != Pmode)
6051 reason = "index is not in Pmode";
6055 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6056 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6058 reason = "index is not valid";
6063 /* Validate scale factor. */
6066 reason_rtx = GEN_INT (scale);
6069 reason = "scale without index";
6073 if (scale != 2 && scale != 4 && scale != 8)
6075 reason = "scale is not a valid multiplier";
6080 /* Validate displacement. */
6085 if (GET_CODE (disp) == CONST
6086 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6087 switch (XINT (XEXP (disp, 0), 1))
6091 case UNSPEC_GOTPCREL:
6094 goto is_legitimate_pic;
6096 case UNSPEC_GOTTPOFF:
6097 case UNSPEC_GOTNTPOFF:
6098 case UNSPEC_INDNTPOFF:
6104 reason = "invalid address unspec";
6108 else if (flag_pic && (SYMBOLIC_CONST (disp)
6110 && !machopic_operand_p (disp)
6115 if (TARGET_64BIT && (index || base))
6117 /* foo@dtpoff(%rX) is ok. */
6118 if (GET_CODE (disp) != CONST
6119 || GET_CODE (XEXP (disp, 0)) != PLUS
6120 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6121 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6122 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6123 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6125 reason = "non-constant pic memory reference";
6129 else if (! legitimate_pic_address_disp_p (disp))
6131 reason = "displacement is an invalid pic construct";
6135 /* This code used to verify that a symbolic pic displacement
6136 includes the pic_offset_table_rtx register.
6138 While this is good idea, unfortunately these constructs may
6139 be created by "adds using lea" optimization for incorrect
6148 This code is nonsensical, but results in addressing
6149 GOT table with pic_offset_table_rtx base. We can't
6150 just refuse it easily, since it gets matched by
6151 "addsi3" pattern, that later gets split to lea in the
6152 case output register differs from input. While this
6153 can be handled by separate addsi pattern for this case
6154 that never results in lea, this seems to be easier and
6155 correct fix for crash to disable this test. */
6157 else if (GET_CODE (disp) != LABEL_REF
6158 && GET_CODE (disp) != CONST_INT
6159 && (GET_CODE (disp) != CONST
6160 || !legitimate_constant_p (disp))
6161 && (GET_CODE (disp) != SYMBOL_REF
6162 || !legitimate_constant_p (disp)))
6164 reason = "displacement is not constant";
6167 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6169 reason = "displacement is out of range";
6174 /* Everything looks valid. */
6175 if (TARGET_DEBUG_ADDR)
6176 fprintf (stderr, "Success.\n");
6180 if (TARGET_DEBUG_ADDR)
6182 fprintf (stderr, "Error: %s\n", reason);
6183 debug_rtx (reason_rtx);
6188 /* Return an unique alias set for the GOT. */
6190 static HOST_WIDE_INT
6191 ix86_GOT_alias_set (void)
6193 static HOST_WIDE_INT set = -1;
6195 set = new_alias_set ();
6199 /* Return a legitimate reference for ORIG (an address) using the
6200 register REG. If REG is 0, a new pseudo is generated.
6202 There are two types of references that must be handled:
6204 1. Global data references must load the address from the GOT, via
6205 the PIC reg. An insn is emitted to do this load, and the reg is
6208 2. Static data references, constant pool addresses, and code labels
6209 compute the address as an offset from the GOT, whose base is in
6210 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6211 differentiate them from global data objects. The returned
6212 address is the PIC reg + an unspec constant.
6214 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6215 reg also appears in the address. */
6218 legitimize_pic_address (rtx orig, rtx reg)
6226 reg = gen_reg_rtx (Pmode);
6227 /* Use the generic Mach-O PIC machinery. */
6228 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6231 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6233 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6235 /* This symbol may be referenced via a displacement from the PIC
6236 base address (@GOTOFF). */
6238 if (reload_in_progress)
6239 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6240 if (GET_CODE (addr) == CONST)
6241 addr = XEXP (addr, 0);
6242 if (GET_CODE (addr) == PLUS)
6244 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6245 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6248 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6249 new = gen_rtx_CONST (Pmode, new);
6250 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6254 emit_move_insn (reg, new);
6258 else if (GET_CODE (addr) == SYMBOL_REF)
6262 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6263 new = gen_rtx_CONST (Pmode, new);
6264 new = gen_rtx_MEM (Pmode, new);
6265 RTX_UNCHANGING_P (new) = 1;
6266 set_mem_alias_set (new, ix86_GOT_alias_set ());
6269 reg = gen_reg_rtx (Pmode);
6270 /* Use directly gen_movsi, otherwise the address is loaded
6271 into register for CSE. We don't want to CSE this addresses,
6272 instead we CSE addresses from the GOT table, so skip this. */
6273 emit_insn (gen_movsi (reg, new));
6278 /* This symbol must be referenced via a load from the
6279 Global Offset Table (@GOT). */
6281 if (reload_in_progress)
6282 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6283 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6284 new = gen_rtx_CONST (Pmode, new);
6285 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6286 new = gen_rtx_MEM (Pmode, new);
6287 RTX_UNCHANGING_P (new) = 1;
6288 set_mem_alias_set (new, ix86_GOT_alias_set ());
6291 reg = gen_reg_rtx (Pmode);
6292 emit_move_insn (reg, new);
6298 if (GET_CODE (addr) == CONST)
6300 addr = XEXP (addr, 0);
6302 /* We must match stuff we generate before. Assume the only
6303 unspecs that can get here are ours. Not that we could do
6304 anything with them anyway.... */
6305 if (GET_CODE (addr) == UNSPEC
6306 || (GET_CODE (addr) == PLUS
6307 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6309 if (GET_CODE (addr) != PLUS)
6312 if (GET_CODE (addr) == PLUS)
6314 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6316 /* Check first to see if this is a constant offset from a @GOTOFF
6317 symbol reference. */
6318 if (local_symbolic_operand (op0, Pmode)
6319 && GET_CODE (op1) == CONST_INT)
6323 if (reload_in_progress)
6324 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6325 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6327 new = gen_rtx_PLUS (Pmode, new, op1);
6328 new = gen_rtx_CONST (Pmode, new);
6329 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6333 emit_move_insn (reg, new);
6339 if (INTVAL (op1) < -16*1024*1024
6340 || INTVAL (op1) >= 16*1024*1024)
6341 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6346 base = legitimize_pic_address (XEXP (addr, 0), reg);
6347 new = legitimize_pic_address (XEXP (addr, 1),
6348 base == reg ? NULL_RTX : reg);
6350 if (GET_CODE (new) == CONST_INT)
6351 new = plus_constant (base, INTVAL (new));
6354 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6356 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6357 new = XEXP (new, 1);
6359 new = gen_rtx_PLUS (Pmode, base, new);
6367 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6370 get_thread_pointer (int to_reg)
6374 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6378 reg = gen_reg_rtx (Pmode);
6379 insn = gen_rtx_SET (VOIDmode, reg, tp);
6380 insn = emit_insn (insn);
6385 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6386 false if we expect this to be used for a memory address and true if
6387 we expect to load the address into a register. */
6390 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6392 rtx dest, base, off, pic;
6397 case TLS_MODEL_GLOBAL_DYNAMIC:
6398 dest = gen_reg_rtx (Pmode);
6401 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6404 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6405 insns = get_insns ();
6408 emit_libcall_block (insns, dest, rax, x);
6411 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6414 case TLS_MODEL_LOCAL_DYNAMIC:
6415 base = gen_reg_rtx (Pmode);
6418 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6421 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6422 insns = get_insns ();
6425 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6426 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6427 emit_libcall_block (insns, base, rax, note);
6430 emit_insn (gen_tls_local_dynamic_base_32 (base));
6432 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6433 off = gen_rtx_CONST (Pmode, off);
6435 return gen_rtx_PLUS (Pmode, base, off);
6437 case TLS_MODEL_INITIAL_EXEC:
6441 type = UNSPEC_GOTNTPOFF;
6445 if (reload_in_progress)
6446 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6447 pic = pic_offset_table_rtx;
6448 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6450 else if (!TARGET_GNU_TLS)
6452 pic = gen_reg_rtx (Pmode);
6453 emit_insn (gen_set_got (pic));
6454 type = UNSPEC_GOTTPOFF;
6459 type = UNSPEC_INDNTPOFF;
6462 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6463 off = gen_rtx_CONST (Pmode, off);
6465 off = gen_rtx_PLUS (Pmode, pic, off);
6466 off = gen_rtx_MEM (Pmode, off);
6467 RTX_UNCHANGING_P (off) = 1;
6468 set_mem_alias_set (off, ix86_GOT_alias_set ());
6470 if (TARGET_64BIT || TARGET_GNU_TLS)
6472 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6473 off = force_reg (Pmode, off);
6474 return gen_rtx_PLUS (Pmode, base, off);
6478 base = get_thread_pointer (true);
6479 dest = gen_reg_rtx (Pmode);
6480 emit_insn (gen_subsi3 (dest, base, off));
6484 case TLS_MODEL_LOCAL_EXEC:
6485 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6486 (TARGET_64BIT || TARGET_GNU_TLS)
6487 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6488 off = gen_rtx_CONST (Pmode, off);
6490 if (TARGET_64BIT || TARGET_GNU_TLS)
6492 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6493 return gen_rtx_PLUS (Pmode, base, off);
6497 base = get_thread_pointer (true);
6498 dest = gen_reg_rtx (Pmode);
6499 emit_insn (gen_subsi3 (dest, base, off));
6510 /* Try machine-dependent ways of modifying an illegitimate address
6511 to be legitimate. If we find one, return the new, valid address.
6512 This macro is used in only one place: `memory_address' in explow.c.
6514 OLDX is the address as it was before break_out_memory_refs was called.
6515 In some cases it is useful to look at this to decide what needs to be done.
6517 MODE and WIN are passed so that this macro can use
6518 GO_IF_LEGITIMATE_ADDRESS.
6520 It is always safe for this macro to do nothing. It exists to recognize
6521 opportunities to optimize the output.
6523 For the 80386, we handle X+REG by loading X into a register R and
6524 using R+REG. R will go in a general reg and indexing will be used.
6525 However, if REG is a broken-out memory address or multiplication,
6526 nothing needs to be done because REG can certainly go in a general reg.
6528 When -fpic is used, special handling is needed for symbolic references.
6529 See comments by legitimize_pic_address in i386.c for details. */
6532 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6537 if (TARGET_DEBUG_ADDR)
6539 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6540 GET_MODE_NAME (mode));
6544 log = tls_symbolic_operand (x, mode);
6546 return legitimize_tls_address (x, log, false);
6548 if (flag_pic && SYMBOLIC_CONST (x))
6549 return legitimize_pic_address (x, 0);
6551 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6552 if (GET_CODE (x) == ASHIFT
6553 && GET_CODE (XEXP (x, 1)) == CONST_INT
6554 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6557 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6558 GEN_INT (1 << log));
6561 if (GET_CODE (x) == PLUS)
6563 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6565 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6566 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6567 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6570 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6571 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6572 GEN_INT (1 << log));
6575 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6576 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6577 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6580 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6581 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6582 GEN_INT (1 << log));
6585 /* Put multiply first if it isn't already. */
6586 if (GET_CODE (XEXP (x, 1)) == MULT)
6588 rtx tmp = XEXP (x, 0);
6589 XEXP (x, 0) = XEXP (x, 1);
6594 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6595 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6596 created by virtual register instantiation, register elimination, and
6597 similar optimizations. */
6598 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6601 x = gen_rtx_PLUS (Pmode,
6602 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6603 XEXP (XEXP (x, 1), 0)),
6604 XEXP (XEXP (x, 1), 1));
6608 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6609 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6610 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6611 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6612 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6613 && CONSTANT_P (XEXP (x, 1)))
6616 rtx other = NULL_RTX;
6618 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6620 constant = XEXP (x, 1);
6621 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6623 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6625 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6626 other = XEXP (x, 1);
6634 x = gen_rtx_PLUS (Pmode,
6635 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6636 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6637 plus_constant (other, INTVAL (constant)));
6641 if (changed && legitimate_address_p (mode, x, FALSE))
6644 if (GET_CODE (XEXP (x, 0)) == MULT)
6647 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6650 if (GET_CODE (XEXP (x, 1)) == MULT)
6653 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6657 && GET_CODE (XEXP (x, 1)) == REG
6658 && GET_CODE (XEXP (x, 0)) == REG)
6661 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6664 x = legitimize_pic_address (x, 0);
6667 if (changed && legitimate_address_p (mode, x, FALSE))
6670 if (GET_CODE (XEXP (x, 0)) == REG)
6672 rtx temp = gen_reg_rtx (Pmode);
6673 rtx val = force_operand (XEXP (x, 1), temp);
6675 emit_move_insn (temp, val);
6681 else if (GET_CODE (XEXP (x, 1)) == REG)
6683 rtx temp = gen_reg_rtx (Pmode);
6684 rtx val = force_operand (XEXP (x, 0), temp);
6686 emit_move_insn (temp, val);
6696 /* Print an integer constant expression in assembler syntax. Addition
6697 and subtraction are the only arithmetic that may appear in these
6698 expressions. FILE is the stdio stream to write to, X is the rtx, and
6699 CODE is the operand print code from the output string. */
6702 output_pic_addr_const (FILE *file, rtx x, int code)
6706 switch (GET_CODE (x))
6716 assemble_name (file, XSTR (x, 0));
6717 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6718 fputs ("@PLT", file);
6725 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6726 assemble_name (asm_out_file, buf);
6730 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6734 /* This used to output parentheses around the expression,
6735 but that does not work on the 386 (either ATT or BSD assembler). */
6736 output_pic_addr_const (file, XEXP (x, 0), code);
6740 if (GET_MODE (x) == VOIDmode)
6742 /* We can use %d if the number is <32 bits and positive. */
6743 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6744 fprintf (file, "0x%lx%08lx",
6745 (unsigned long) CONST_DOUBLE_HIGH (x),
6746 (unsigned long) CONST_DOUBLE_LOW (x));
6748 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6751 /* We can't handle floating point constants;
6752 PRINT_OPERAND must handle them. */
6753 output_operand_lossage ("floating constant misused");
6757 /* Some assemblers need integer constants to appear first. */
6758 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6760 output_pic_addr_const (file, XEXP (x, 0), code);
6762 output_pic_addr_const (file, XEXP (x, 1), code);
6764 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6766 output_pic_addr_const (file, XEXP (x, 1), code);
6768 output_pic_addr_const (file, XEXP (x, 0), code);
6776 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6777 output_pic_addr_const (file, XEXP (x, 0), code);
6779 output_pic_addr_const (file, XEXP (x, 1), code);
6781 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6785 if (XVECLEN (x, 0) != 1)
6787 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6788 switch (XINT (x, 1))
6791 fputs ("@GOT", file);
6794 fputs ("@GOTOFF", file);
6796 case UNSPEC_GOTPCREL:
6797 fputs ("@GOTPCREL(%rip)", file);
6799 case UNSPEC_GOTTPOFF:
6800 /* FIXME: This might be @TPOFF in Sun ld too. */
6801 fputs ("@GOTTPOFF", file);
6804 fputs ("@TPOFF", file);
6808 fputs ("@TPOFF", file);
6810 fputs ("@NTPOFF", file);
6813 fputs ("@DTPOFF", file);
6815 case UNSPEC_GOTNTPOFF:
6817 fputs ("@GOTTPOFF(%rip)", file);
6819 fputs ("@GOTNTPOFF", file);
6821 case UNSPEC_INDNTPOFF:
6822 fputs ("@INDNTPOFF", file);
6825 output_operand_lossage ("invalid UNSPEC as operand");
6831 output_operand_lossage ("invalid expression as operand");
6835 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6836 We need to handle our special PIC relocations. */
6839 i386_dwarf_output_addr_const (FILE *file, rtx x)
6842 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6846 fprintf (file, "%s", ASM_LONG);
6849 output_pic_addr_const (file, x, '\0');
6851 output_addr_const (file, x);
6855 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6856 We need to emit DTP-relative relocations. */
6859 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6861 fputs (ASM_LONG, file);
6862 output_addr_const (file, x);
6863 fputs ("@DTPOFF", file);
6869 fputs (", 0", file);
6876 /* In the name of slightly smaller debug output, and to cater to
6877 general assembler losage, recognize PIC+GOTOFF and turn it back
6878 into a direct symbol reference. */
6881 ix86_delegitimize_address (rtx orig_x)
6885 if (GET_CODE (x) == MEM)
6890 if (GET_CODE (x) != CONST
6891 || GET_CODE (XEXP (x, 0)) != UNSPEC
6892 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6893 || GET_CODE (orig_x) != MEM)
6895 return XVECEXP (XEXP (x, 0), 0, 0);
6898 if (GET_CODE (x) != PLUS
6899 || GET_CODE (XEXP (x, 1)) != CONST)
6902 if (GET_CODE (XEXP (x, 0)) == REG
6903 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6904 /* %ebx + GOT/GOTOFF */
6906 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6908 /* %ebx + %reg * scale + GOT/GOTOFF */
6910 if (GET_CODE (XEXP (y, 0)) == REG
6911 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6913 else if (GET_CODE (XEXP (y, 1)) == REG
6914 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6918 if (GET_CODE (y) != REG
6919 && GET_CODE (y) != MULT
6920 && GET_CODE (y) != ASHIFT)
6926 x = XEXP (XEXP (x, 1), 0);
6927 if (GET_CODE (x) == UNSPEC
6928 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6929 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6932 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6933 return XVECEXP (x, 0, 0);
6936 if (GET_CODE (x) == PLUS
6937 && GET_CODE (XEXP (x, 0)) == UNSPEC
6938 && GET_CODE (XEXP (x, 1)) == CONST_INT
6939 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6940 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6941 && GET_CODE (orig_x) != MEM)))
6943 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6945 return gen_rtx_PLUS (Pmode, y, x);
6953 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6958 if (mode == CCFPmode || mode == CCFPUmode)
6960 enum rtx_code second_code, bypass_code;
6961 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6962 if (bypass_code != NIL || second_code != NIL)
6964 code = ix86_fp_compare_code_to_integer (code);
6968 code = reverse_condition (code);
6979 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6984 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6985 Those same assemblers have the same but opposite losage on cmov. */
6988 suffix = fp ? "nbe" : "a";
6991 if (mode == CCNOmode || mode == CCGOCmode)
6993 else if (mode == CCmode || mode == CCGCmode)
7004 if (mode == CCNOmode || mode == CCGOCmode)
7006 else if (mode == CCmode || mode == CCGCmode)
7015 suffix = fp ? "nb" : "ae";
7018 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7028 suffix = fp ? "u" : "p";
7031 suffix = fp ? "nu" : "np";
7036 fputs (suffix, file);
7039 /* Print the name of register X to FILE based on its machine mode and number.
7040 If CODE is 'w', pretend the mode is HImode.
7041 If CODE is 'b', pretend the mode is QImode.
7042 If CODE is 'k', pretend the mode is SImode.
7043 If CODE is 'q', pretend the mode is DImode.
7044 If CODE is 'h', pretend the reg is the `high' byte register.
7045 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7048 print_reg (rtx x, int code, FILE *file)
7050 if (REGNO (x) == ARG_POINTER_REGNUM
7051 || REGNO (x) == FRAME_POINTER_REGNUM
7052 || REGNO (x) == FLAGS_REG
7053 || REGNO (x) == FPSR_REG)
7056 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7059 if (code == 'w' || MMX_REG_P (x))
7061 else if (code == 'b')
7063 else if (code == 'k')
7065 else if (code == 'q')
7067 else if (code == 'y')
7069 else if (code == 'h')
7072 code = GET_MODE_SIZE (GET_MODE (x));
7074 /* Irritatingly, AMD extended registers use different naming convention
7075 from the normal registers. */
7076 if (REX_INT_REG_P (x))
7083 error ("extended registers have no high halves");
7086 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7089 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7092 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7095 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7098 error ("unsupported operand size for extended register");
7106 if (STACK_TOP_P (x))
7108 fputs ("st(0)", file);
7115 if (! ANY_FP_REG_P (x))
7116 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7121 fputs (hi_reg_name[REGNO (x)], file);
7124 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7126 fputs (qi_reg_name[REGNO (x)], file);
7129 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7131 fputs (qi_high_reg_name[REGNO (x)], file);
7138 /* Locate some local-dynamic symbol still in use by this function
7139 so that we can print its name in some tls_local_dynamic_base
7143 get_some_local_dynamic_name (void)
7147 if (cfun->machine->some_ld_name)
7148 return cfun->machine->some_ld_name;
7150 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7152 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7153 return cfun->machine->some_ld_name;
7159 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7163 if (GET_CODE (x) == SYMBOL_REF
7164 && local_dynamic_symbolic_operand (x, Pmode))
7166 cfun->machine->some_ld_name = XSTR (x, 0);
7174 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7175 C -- print opcode suffix for set/cmov insn.
7176 c -- like C, but print reversed condition
7177 F,f -- likewise, but for floating-point.
7178 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7180 R -- print the prefix for register names.
7181 z -- print the opcode suffix for the size of the current operand.
7182 * -- print a star (in certain assembler syntax)
7183 A -- print an absolute memory reference.
7184 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7185 s -- print a shift double count, followed by the assemblers argument
7187 b -- print the QImode name of the register for the indicated operand.
7188 %b0 would print %al if operands[0] is reg 0.
7189 w -- likewise, print the HImode name of the register.
7190 k -- likewise, print the SImode name of the register.
7191 q -- likewise, print the DImode name of the register.
7192 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7193 y -- print "st(0)" instead of "st" as a register.
7194 D -- print condition for SSE cmp instruction.
7195 P -- if PIC, print an @PLT suffix.
7196 X -- don't print any sort of PIC '@' suffix for a symbol.
7197 & -- print some in-use local-dynamic symbol name.
7201 print_operand (FILE *file, rtx x, int code)
7208 if (ASSEMBLER_DIALECT == ASM_ATT)
7213 assemble_name (file, get_some_local_dynamic_name ());
7217 if (ASSEMBLER_DIALECT == ASM_ATT)
7219 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7221 /* Intel syntax. For absolute addresses, registers should not
7222 be surrounded by braces. */
7223 if (GET_CODE (x) != REG)
7226 PRINT_OPERAND (file, x, 0);
7234 PRINT_OPERAND (file, x, 0);
7239 if (ASSEMBLER_DIALECT == ASM_ATT)
7244 if (ASSEMBLER_DIALECT == ASM_ATT)
7249 if (ASSEMBLER_DIALECT == ASM_ATT)
7254 if (ASSEMBLER_DIALECT == ASM_ATT)
7259 if (ASSEMBLER_DIALECT == ASM_ATT)
7264 if (ASSEMBLER_DIALECT == ASM_ATT)
7269 /* 387 opcodes don't get size suffixes if the operands are
7271 if (STACK_REG_P (x))
7274 /* Likewise if using Intel opcodes. */
7275 if (ASSEMBLER_DIALECT == ASM_INTEL)
7278 /* This is the size of op from size of operand. */
7279 switch (GET_MODE_SIZE (GET_MODE (x)))
7282 #ifdef HAVE_GAS_FILDS_FISTS
7288 if (GET_MODE (x) == SFmode)
7303 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7305 #ifdef GAS_MNEMONICS
7331 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7333 PRINT_OPERAND (file, x, 0);
7339 /* Little bit of braindamage here. The SSE compare instructions
7340 does use completely different names for the comparisons that the
7341 fp conditional moves. */
7342 switch (GET_CODE (x))
7357 fputs ("unord", file);
7361 fputs ("neq", file);
7365 fputs ("nlt", file);
7369 fputs ("nle", file);
7372 fputs ("ord", file);
7380 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7381 if (ASSEMBLER_DIALECT == ASM_ATT)
7383 switch (GET_MODE (x))
7385 case HImode: putc ('w', file); break;
7387 case SFmode: putc ('l', file); break;
7389 case DFmode: putc ('q', file); break;
7397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7400 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7401 if (ASSEMBLER_DIALECT == ASM_ATT)
7404 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7407 /* Like above, but reverse condition */
7409 /* Check to see if argument to %c is really a constant
7410 and not a condition code which needs to be reversed. */
7411 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7413 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7416 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7419 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7420 if (ASSEMBLER_DIALECT == ASM_ATT)
7423 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7429 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7432 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7435 int pred_val = INTVAL (XEXP (x, 0));
7437 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7438 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7440 int taken = pred_val > REG_BR_PROB_BASE / 2;
7441 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7443 /* Emit hints only in the case default branch prediction
7444 heuristics would fail. */
7445 if (taken != cputaken)
7447 /* We use 3e (DS) prefix for taken branches and
7448 2e (CS) prefix for not taken branches. */
7450 fputs ("ds ; ", file);
7452 fputs ("cs ; ", file);
7459 output_operand_lossage ("invalid operand code `%c'", code);
7463 if (GET_CODE (x) == REG)
7464 print_reg (x, code, file);
7466 else if (GET_CODE (x) == MEM)
7468 /* No `byte ptr' prefix for call instructions. */
7469 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7472 switch (GET_MODE_SIZE (GET_MODE (x)))
7474 case 1: size = "BYTE"; break;
7475 case 2: size = "WORD"; break;
7476 case 4: size = "DWORD"; break;
7477 case 8: size = "QWORD"; break;
7478 case 12: size = "XWORD"; break;
7479 case 16: size = "XMMWORD"; break;
7484 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7487 else if (code == 'w')
7489 else if (code == 'k')
7493 fputs (" PTR ", file);
7497 /* Avoid (%rip) for call operands. */
7498 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7499 && GET_CODE (x) != CONST_INT)
7500 output_addr_const (file, x);
7501 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7502 output_operand_lossage ("invalid constraints for operand");
7507 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7512 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7513 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7515 if (ASSEMBLER_DIALECT == ASM_ATT)
7517 fprintf (file, "0x%08lx", l);
7520 /* These float cases don't actually occur as immediate operands. */
7521 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7525 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7526 fprintf (file, "%s", dstr);
7529 else if (GET_CODE (x) == CONST_DOUBLE
7530 && GET_MODE (x) == XFmode)
7534 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7535 fprintf (file, "%s", dstr);
7542 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7544 if (ASSEMBLER_DIALECT == ASM_ATT)
7547 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7548 || GET_CODE (x) == LABEL_REF)
7550 if (ASSEMBLER_DIALECT == ASM_ATT)
7553 fputs ("OFFSET FLAT:", file);
7556 if (GET_CODE (x) == CONST_INT)
7557 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7559 output_pic_addr_const (file, x, code);
7561 output_addr_const (file, x);
7565 /* Print a memory operand whose address is ADDR. */
7568 print_operand_address (FILE *file, rtx addr)
7570 struct ix86_address parts;
7571 rtx base, index, disp;
7574 if (! ix86_decompose_address (addr, &parts))
7578 index = parts.index;
7580 scale = parts.scale;
7588 if (USER_LABEL_PREFIX[0] == 0)
7590 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7596 if (!base && !index)
7598 /* Displacement only requires special attention. */
7600 if (GET_CODE (disp) == CONST_INT)
7602 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7604 if (USER_LABEL_PREFIX[0] == 0)
7606 fputs ("ds:", file);
7608 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7611 output_pic_addr_const (file, disp, 0);
7613 output_addr_const (file, disp);
7615 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7617 && ((GET_CODE (disp) == SYMBOL_REF
7618 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7619 || GET_CODE (disp) == LABEL_REF
7620 || (GET_CODE (disp) == CONST
7621 && GET_CODE (XEXP (disp, 0)) == PLUS
7622 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7623 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7624 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7625 fputs ("(%rip)", file);
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7634 output_pic_addr_const (file, disp, 0);
7635 else if (GET_CODE (disp) == LABEL_REF)
7636 output_asm_label (disp);
7638 output_addr_const (file, disp);
7643 print_reg (base, 0, file);
7647 print_reg (index, 0, file);
7649 fprintf (file, ",%d", scale);
7655 rtx offset = NULL_RTX;
7659 /* Pull out the offset of a symbol; print any symbol itself. */
7660 if (GET_CODE (disp) == CONST
7661 && GET_CODE (XEXP (disp, 0)) == PLUS
7662 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7664 offset = XEXP (XEXP (disp, 0), 1);
7665 disp = gen_rtx_CONST (VOIDmode,
7666 XEXP (XEXP (disp, 0), 0));
7670 output_pic_addr_const (file, disp, 0);
7671 else if (GET_CODE (disp) == LABEL_REF)
7672 output_asm_label (disp);
7673 else if (GET_CODE (disp) == CONST_INT)
7676 output_addr_const (file, disp);
7682 print_reg (base, 0, file);
7685 if (INTVAL (offset) >= 0)
7687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7691 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7698 print_reg (index, 0, file);
7700 fprintf (file, "*%d", scale);
7708 output_addr_const_extra (FILE *file, rtx x)
7712 if (GET_CODE (x) != UNSPEC)
7715 op = XVECEXP (x, 0, 0);
7716 switch (XINT (x, 1))
7718 case UNSPEC_GOTTPOFF:
7719 output_addr_const (file, op);
7720 /* FIXME: This might be @TPOFF in Sun ld. */
7721 fputs ("@GOTTPOFF", file);
7724 output_addr_const (file, op);
7725 fputs ("@TPOFF", file);
7728 output_addr_const (file, op);
7730 fputs ("@TPOFF", file);
7732 fputs ("@NTPOFF", file);
7735 output_addr_const (file, op);
7736 fputs ("@DTPOFF", file);
7738 case UNSPEC_GOTNTPOFF:
7739 output_addr_const (file, op);
7741 fputs ("@GOTTPOFF(%rip)", file);
7743 fputs ("@GOTNTPOFF", file);
7745 case UNSPEC_INDNTPOFF:
7746 output_addr_const (file, op);
7747 fputs ("@INDNTPOFF", file);
7757 /* Split one or more DImode RTL references into pairs of SImode
7758 references. The RTL can be REG, offsettable MEM, integer constant, or
7759 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7760 split and "num" is its length. lo_half and hi_half are output arrays
7761 that parallel "operands". */
7764 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7768 rtx op = operands[num];
7770 /* simplify_subreg refuse to split volatile memory addresses,
7771 but we still have to handle it. */
7772 if (GET_CODE (op) == MEM)
7774 lo_half[num] = adjust_address (op, SImode, 0);
7775 hi_half[num] = adjust_address (op, SImode, 4);
7779 lo_half[num] = simplify_gen_subreg (SImode, op,
7780 GET_MODE (op) == VOIDmode
7781 ? DImode : GET_MODE (op), 0);
7782 hi_half[num] = simplify_gen_subreg (SImode, op,
7783 GET_MODE (op) == VOIDmode
7784 ? DImode : GET_MODE (op), 4);
7788 /* Split one or more TImode RTL references into pairs of SImode
7789 references. The RTL can be REG, offsettable MEM, integer constant, or
7790 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7791 split and "num" is its length. lo_half and hi_half are output arrays
7792 that parallel "operands". */
7795 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7799 rtx op = operands[num];
7801 /* simplify_subreg refuse to split volatile memory addresses, but we
7802 still have to handle it. */
7803 if (GET_CODE (op) == MEM)
7805 lo_half[num] = adjust_address (op, DImode, 0);
7806 hi_half[num] = adjust_address (op, DImode, 8);
7810 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7811 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7816 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7817 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7818 is the expression of the binary operation. The output may either be
7819 emitted here, or returned to the caller, like all output_* functions.
7821 There is no guarantee that the operands are the same mode, as they
7822 might be within FLOAT or FLOAT_EXTEND expressions. */
7824 #ifndef SYSV386_COMPAT
7825 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7826 wants to fix the assemblers because that causes incompatibility
7827 with gcc. No-one wants to fix gcc because that causes
7828 incompatibility with assemblers... You can use the option of
7829 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7830 #define SYSV386_COMPAT 1
7834 output_387_binary_op (rtx insn, rtx *operands)
7836 static char buf[30];
7839 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7841 #ifdef ENABLE_CHECKING
7842 /* Even if we do not want to check the inputs, this documents input
7843 constraints. Which helps in understanding the following code. */
7844 if (STACK_REG_P (operands[0])
7845 && ((REG_P (operands[1])
7846 && REGNO (operands[0]) == REGNO (operands[1])
7847 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7848 || (REG_P (operands[2])
7849 && REGNO (operands[0]) == REGNO (operands[2])
7850 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7851 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7857 switch (GET_CODE (operands[3]))
7860 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7861 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7869 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7870 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7879 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7888 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7902 if (GET_MODE (operands[0]) == SFmode)
7903 strcat (buf, "ss\t{%2, %0|%0, %2}");
7905 strcat (buf, "sd\t{%2, %0|%0, %2}");
7910 switch (GET_CODE (operands[3]))
7914 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7916 rtx temp = operands[2];
7917 operands[2] = operands[1];
7921 /* know operands[0] == operands[1]. */
7923 if (GET_CODE (operands[2]) == MEM)
7929 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7931 if (STACK_TOP_P (operands[0]))
7932 /* How is it that we are storing to a dead operand[2]?
7933 Well, presumably operands[1] is dead too. We can't
7934 store the result to st(0) as st(0) gets popped on this
7935 instruction. Instead store to operands[2] (which I
7936 think has to be st(1)). st(1) will be popped later.
7937 gcc <= 2.8.1 didn't have this check and generated
7938 assembly code that the Unixware assembler rejected. */
7939 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7941 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7945 if (STACK_TOP_P (operands[0]))
7946 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7948 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7953 if (GET_CODE (operands[1]) == MEM)
7959 if (GET_CODE (operands[2]) == MEM)
7965 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7968 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7969 derived assemblers, confusingly reverse the direction of
7970 the operation for fsub{r} and fdiv{r} when the
7971 destination register is not st(0). The Intel assembler
7972 doesn't have this brain damage. Read !SYSV386_COMPAT to
7973 figure out what the hardware really does. */
7974 if (STACK_TOP_P (operands[0]))
7975 p = "{p\t%0, %2|rp\t%2, %0}";
7977 p = "{rp\t%2, %0|p\t%0, %2}";
7979 if (STACK_TOP_P (operands[0]))
7980 /* As above for fmul/fadd, we can't store to st(0). */
7981 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7983 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7988 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7991 if (STACK_TOP_P (operands[0]))
7992 p = "{rp\t%0, %1|p\t%1, %0}";
7994 p = "{p\t%1, %0|rp\t%0, %1}";
7996 if (STACK_TOP_P (operands[0]))
7997 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7999 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8004 if (STACK_TOP_P (operands[0]))
8006 if (STACK_TOP_P (operands[1]))
8007 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8009 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8012 else if (STACK_TOP_P (operands[1]))
8015 p = "{\t%1, %0|r\t%0, %1}";
8017 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8023 p = "{r\t%2, %0|\t%0, %2}";
8025 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8038 /* Output code to initialize control word copies used by
8039 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8040 is set to control word rounding downwards. */
8042 emit_i387_cw_initialization (rtx normal, rtx round_down)
8044 rtx reg = gen_reg_rtx (HImode);
8046 emit_insn (gen_x86_fnstcw_1 (normal));
8047 emit_move_insn (reg, normal);
8048 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8050 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8053 emit_move_insn (round_down, reg);
8056 /* Output code for INSN to convert a float to a signed int. OPERANDS
8057 are the insn operands. The output may be [HSD]Imode and the input
8058 operand may be [SDX]Fmode. */
8061 output_fix_trunc (rtx insn, rtx *operands)
8063 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8064 int dimode_p = GET_MODE (operands[0]) == DImode;
8066 /* Jump through a hoop or two for DImode, since the hardware has no
8067 non-popping instruction. We used to do this a different way, but
8068 that was somewhat fragile and broke with post-reload splitters. */
8069 if (dimode_p && !stack_top_dies)
8070 output_asm_insn ("fld\t%y1", operands);
8072 if (!STACK_TOP_P (operands[1]))
8075 if (GET_CODE (operands[0]) != MEM)
8078 output_asm_insn ("fldcw\t%3", operands);
8079 if (stack_top_dies || dimode_p)
8080 output_asm_insn ("fistp%z0\t%0", operands);
8082 output_asm_insn ("fist%z0\t%0", operands);
8083 output_asm_insn ("fldcw\t%2", operands);
8088 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8089 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8090 when fucom should be used. */
8093 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8096 rtx cmp_op0 = operands[0];
8097 rtx cmp_op1 = operands[1];
8098 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8103 cmp_op1 = operands[2];
8107 if (GET_MODE (operands[0]) == SFmode)
8109 return "ucomiss\t{%1, %0|%0, %1}";
8111 return "comiss\t{%1, %0|%0, %1}";
8114 return "ucomisd\t{%1, %0|%0, %1}";
8116 return "comisd\t{%1, %0|%0, %1}";
8119 if (! STACK_TOP_P (cmp_op0))
8122 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8124 if (STACK_REG_P (cmp_op1)
8126 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8127 && REGNO (cmp_op1) != FIRST_STACK_REG)
8129 /* If both the top of the 387 stack dies, and the other operand
8130 is also a stack register that dies, then this must be a
8131 `fcompp' float compare */
8135 /* There is no double popping fcomi variant. Fortunately,
8136 eflags is immune from the fstp's cc clobbering. */
8138 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8140 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8148 return "fucompp\n\tfnstsw\t%0";
8150 return "fcompp\n\tfnstsw\t%0";
8163 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8165 static const char * const alt[24] =
8177 "fcomi\t{%y1, %0|%0, %y1}",
8178 "fcomip\t{%y1, %0|%0, %y1}",
8179 "fucomi\t{%y1, %0|%0, %y1}",
8180 "fucomip\t{%y1, %0|%0, %y1}",
8187 "fcom%z2\t%y2\n\tfnstsw\t%0",
8188 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8189 "fucom%z2\t%y2\n\tfnstsw\t%0",
8190 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8192 "ficom%z2\t%y2\n\tfnstsw\t%0",
8193 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8201 mask = eflags_p << 3;
8202 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8203 mask |= unordered_p << 1;
8204 mask |= stack_top_dies;
8217 ix86_output_addr_vec_elt (FILE *file, int value)
8219 const char *directive = ASM_LONG;
8224 directive = ASM_QUAD;
8230 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8234 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8237 fprintf (file, "%s%s%d-%s%d\n",
8238 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8239 else if (HAVE_AS_GOTOFF_IN_DATA)
8240 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8242 else if (TARGET_MACHO)
8244 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8245 machopic_output_function_base_name (file);
8246 fprintf(file, "\n");
8250 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8251 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8254 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8258 ix86_expand_clear (rtx dest)
8262 /* We play register width games, which are only valid after reload. */
8263 if (!reload_completed)
8266 /* Avoid HImode and its attendant prefix byte. */
8267 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8268 dest = gen_rtx_REG (SImode, REGNO (dest));
8270 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8272 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8273 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8275 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8276 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8282 /* X is an unchanging MEM. If it is a constant pool reference, return
8283 the constant pool rtx, else NULL. */
8286 maybe_get_pool_constant (rtx x)
8288 x = ix86_delegitimize_address (XEXP (x, 0));
8290 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8291 return get_pool_constant (x);
8297 ix86_expand_move (enum machine_mode mode, rtx operands[])
8299 int strict = (reload_in_progress || reload_completed);
8301 enum tls_model model;
8306 model = tls_symbolic_operand (op1, Pmode);
8309 op1 = legitimize_tls_address (op1, model, true);
8310 op1 = force_operand (op1, op0);
8315 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8320 rtx temp = ((reload_in_progress
8321 || ((op0 && GET_CODE (op0) == REG)
8323 ? op0 : gen_reg_rtx (Pmode));
8324 op1 = machopic_indirect_data_reference (op1, temp);
8325 op1 = machopic_legitimize_pic_address (op1, mode,
8326 temp == op1 ? 0 : temp);
8328 else if (MACHOPIC_INDIRECT)
8329 op1 = machopic_indirect_data_reference (op1, 0);
8333 if (GET_CODE (op0) == MEM)
8334 op1 = force_reg (Pmode, op1);
8338 if (GET_CODE (temp) != REG)
8339 temp = gen_reg_rtx (Pmode);
8340 temp = legitimize_pic_address (op1, temp);
8345 #endif /* TARGET_MACHO */
8349 if (GET_CODE (op0) == MEM
8350 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8351 || !push_operand (op0, mode))
8352 && GET_CODE (op1) == MEM)
8353 op1 = force_reg (mode, op1);
8355 if (push_operand (op0, mode)
8356 && ! general_no_elim_operand (op1, mode))
8357 op1 = copy_to_mode_reg (mode, op1);
8359 /* Force large constants in 64bit compilation into register
8360 to get them CSEed. */
8361 if (TARGET_64BIT && mode == DImode
8362 && immediate_operand (op1, mode)
8363 && !x86_64_zero_extended_value (op1)
8364 && !register_operand (op0, mode)
8365 && optimize && !reload_completed && !reload_in_progress)
8366 op1 = copy_to_mode_reg (mode, op1);
8368 if (FLOAT_MODE_P (mode))
8370 /* If we are loading a floating point constant to a register,
8371 force the value to memory now, since we'll get better code
8372 out the back end. */
8376 else if (GET_CODE (op1) == CONST_DOUBLE)
8378 op1 = validize_mem (force_const_mem (mode, op1));
8379 if (!register_operand (op0, mode))
8381 rtx temp = gen_reg_rtx (mode);
8382 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8383 emit_move_insn (op0, temp);
8390 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8394 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8396 /* Force constants other than zero into memory. We do not know how
8397 the instructions used to build constants modify the upper 64 bits
8398 of the register, once we have that information we may be able
8399 to handle some of them more efficiently. */
8400 if ((reload_in_progress | reload_completed) == 0
8401 && register_operand (operands[0], mode)
8402 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8403 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8405 /* Make operand1 a register if it isn't already. */
8407 && !register_operand (operands[0], mode)
8408 && !register_operand (operands[1], mode))
8410 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8411 emit_move_insn (operands[0], temp);
8415 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8418 /* Attempt to expand a binary operator. Make the expansion closer to the
8419 actual machine, then just general_operand, which will allow 3 separate
8420 memory references (one output, two input) in a single insn. */
8423 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8426 int matching_memory;
8427 rtx src1, src2, dst, op, clob;
8433 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8434 if (GET_RTX_CLASS (code) == 'c'
8435 && (rtx_equal_p (dst, src2)
8436 || immediate_operand (src1, mode)))
8443 /* If the destination is memory, and we do not have matching source
8444 operands, do things in registers. */
8445 matching_memory = 0;
8446 if (GET_CODE (dst) == MEM)
8448 if (rtx_equal_p (dst, src1))
8449 matching_memory = 1;
8450 else if (GET_RTX_CLASS (code) == 'c'
8451 && rtx_equal_p (dst, src2))
8452 matching_memory = 2;
8454 dst = gen_reg_rtx (mode);
8457 /* Both source operands cannot be in memory. */
8458 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8460 if (matching_memory != 2)
8461 src2 = force_reg (mode, src2);
8463 src1 = force_reg (mode, src1);
8466 /* If the operation is not commutable, source 1 cannot be a constant
8467 or non-matching memory. */
8468 if ((CONSTANT_P (src1)
8469 || (!matching_memory && GET_CODE (src1) == MEM))
8470 && GET_RTX_CLASS (code) != 'c')
8471 src1 = force_reg (mode, src1);
8473 /* If optimizing, copy to regs to improve CSE */
8474 if (optimize && ! no_new_pseudos)
8476 if (GET_CODE (dst) == MEM)
8477 dst = gen_reg_rtx (mode);
8478 if (GET_CODE (src1) == MEM)
8479 src1 = force_reg (mode, src1);
8480 if (GET_CODE (src2) == MEM)
8481 src2 = force_reg (mode, src2);
8484 /* Emit the instruction. */
8486 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8487 if (reload_in_progress)
8489 /* Reload doesn't know about the flags register, and doesn't know that
8490 it doesn't want to clobber it. We can only do this with PLUS. */
8497 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8498 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8501 /* Fix up the destination if needed. */
8502 if (dst != operands[0])
8503 emit_move_insn (operands[0], dst);
8506 /* Return TRUE or FALSE depending on whether the binary operator meets the
8507 appropriate constraints. */
8510 ix86_binary_operator_ok (enum rtx_code code,
8511 enum machine_mode mode ATTRIBUTE_UNUSED,
8514 /* Both source operands cannot be in memory. */
8515 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8517 /* If the operation is not commutable, source 1 cannot be a constant. */
8518 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8520 /* If the destination is memory, we must have a matching source operand. */
8521 if (GET_CODE (operands[0]) == MEM
8522 && ! (rtx_equal_p (operands[0], operands[1])
8523 || (GET_RTX_CLASS (code) == 'c'
8524 && rtx_equal_p (operands[0], operands[2]))))
8526 /* If the operation is not commutable and the source 1 is memory, we must
8527 have a matching destination. */
8528 if (GET_CODE (operands[1]) == MEM
8529 && GET_RTX_CLASS (code) != 'c'
8530 && ! rtx_equal_p (operands[0], operands[1]))
8535 /* Attempt to expand a unary operator. Make the expansion closer to the
8536 actual machine, then just general_operand, which will allow 2 separate
8537 memory references (one output, one input) in a single insn. */
8540 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8543 int matching_memory;
8544 rtx src, dst, op, clob;
8549 /* If the destination is memory, and we do not have matching source
8550 operands, do things in registers. */
8551 matching_memory = 0;
8552 if (GET_CODE (dst) == MEM)
8554 if (rtx_equal_p (dst, src))
8555 matching_memory = 1;
8557 dst = gen_reg_rtx (mode);
8560 /* When source operand is memory, destination must match. */
8561 if (!matching_memory && GET_CODE (src) == MEM)
8562 src = force_reg (mode, src);
8564 /* If optimizing, copy to regs to improve CSE */
8565 if (optimize && ! no_new_pseudos)
8567 if (GET_CODE (dst) == MEM)
8568 dst = gen_reg_rtx (mode);
8569 if (GET_CODE (src) == MEM)
8570 src = force_reg (mode, src);
8573 /* Emit the instruction. */
8575 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8576 if (reload_in_progress || code == NOT)
8578 /* Reload doesn't know about the flags register, and doesn't know that
8579 it doesn't want to clobber it. */
8586 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8587 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8590 /* Fix up the destination if needed. */
8591 if (dst != operands[0])
8592 emit_move_insn (operands[0], dst);
8595 /* Return TRUE or FALSE depending on whether the unary operator meets the
8596 appropriate constraints. */
8599 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8600 enum machine_mode mode ATTRIBUTE_UNUSED,
8601 rtx operands[2] ATTRIBUTE_UNUSED)
8603 /* If one of operands is memory, source and destination must match. */
8604 if ((GET_CODE (operands[0]) == MEM
8605 || GET_CODE (operands[1]) == MEM)
8606 && ! rtx_equal_p (operands[0], operands[1]))
8611 /* Return TRUE or FALSE depending on whether the first SET in INSN
8612 has source and destination with matching CC modes, and that the
8613 CC mode is at least as constrained as REQ_MODE. */
8616 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8619 enum machine_mode set_mode;
8621 set = PATTERN (insn);
8622 if (GET_CODE (set) == PARALLEL)
8623 set = XVECEXP (set, 0, 0);
8624 if (GET_CODE (set) != SET)
8626 if (GET_CODE (SET_SRC (set)) != COMPARE)
8629 set_mode = GET_MODE (SET_DEST (set));
8633 if (req_mode != CCNOmode
8634 && (req_mode != CCmode
8635 || XEXP (SET_SRC (set), 1) != const0_rtx))
8639 if (req_mode == CCGCmode)
8643 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8647 if (req_mode == CCZmode)
8657 return (GET_MODE (SET_SRC (set)) == set_mode);
8660 /* Generate insn patterns to do an integer compare of OPERANDS. */
8663 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8665 enum machine_mode cmpmode;
8668 cmpmode = SELECT_CC_MODE (code, op0, op1);
8669 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8671 /* This is very simple, but making the interface the same as in the
8672 FP case makes the rest of the code easier. */
8673 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8674 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8676 /* Return the test that should be put into the flags user, i.e.
8677 the bcc, scc, or cmov instruction. */
8678 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8681 /* Figure out whether to use ordered or unordered fp comparisons.
8682 Return the appropriate mode to use. */
8685 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8687 /* ??? In order to make all comparisons reversible, we do all comparisons
8688 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8689 all forms trapping and nontrapping comparisons, we can make inequality
8690 comparisons trapping again, since it results in better code when using
8691 FCOM based compares. */
8692 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8696 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8698 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8699 return ix86_fp_compare_mode (code);
8702 /* Only zero flag is needed. */
8704 case NE: /* ZF!=0 */
8706 /* Codes needing carry flag. */
8707 case GEU: /* CF=0 */
8708 case GTU: /* CF=0 & ZF=0 */
8709 case LTU: /* CF=1 */
8710 case LEU: /* CF=1 | ZF=1 */
8712 /* Codes possibly doable only with sign flag when
8713 comparing against zero. */
8714 case GE: /* SF=OF or SF=0 */
8715 case LT: /* SF<>OF or SF=1 */
8716 if (op1 == const0_rtx)
8719 /* For other cases Carry flag is not required. */
8721 /* Codes doable only with sign flag when comparing
8722 against zero, but we miss jump instruction for it
8723 so we need to use relational tests against overflow
8724 that thus needs to be zero. */
8725 case GT: /* ZF=0 & SF=OF */
8726 case LE: /* ZF=1 | SF<>OF */
8727 if (op1 == const0_rtx)
8731 /* strcmp pattern do (use flags) and combine may ask us for proper
8740 /* Return the fixed registers used for condition codes. */
8743 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8750 /* If two condition code modes are compatible, return a condition code
8751 mode which is compatible with both. Otherwise, return
8754 static enum machine_mode
8755 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8760 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8763 if ((m1 == CCGCmode && m2 == CCGOCmode)
8764 || (m1 == CCGOCmode && m2 == CCGCmode))
8792 /* These are only compatible with themselves, which we already
8798 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8801 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8803 enum rtx_code swapped_code = swap_condition (code);
8804 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8805 || (ix86_fp_comparison_cost (swapped_code)
8806 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8809 /* Swap, force into registers, or otherwise massage the two operands
8810 to a fp comparison. The operands are updated in place; the new
8811 comparison code is returned. */
8813 static enum rtx_code
8814 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8816 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8817 rtx op0 = *pop0, op1 = *pop1;
8818 enum machine_mode op_mode = GET_MODE (op0);
8819 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8821 /* All of the unordered compare instructions only work on registers.
8822 The same is true of the XFmode compare instructions. The same is
8823 true of the fcomi compare instructions. */
8826 && (fpcmp_mode == CCFPUmode
8827 || op_mode == XFmode
8828 || ix86_use_fcomi_compare (code)))
8830 op0 = force_reg (op_mode, op0);
8831 op1 = force_reg (op_mode, op1);
8835 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8836 things around if they appear profitable, otherwise force op0
8839 if (standard_80387_constant_p (op0) == 0
8840 || (GET_CODE (op0) == MEM
8841 && ! (standard_80387_constant_p (op1) == 0
8842 || GET_CODE (op1) == MEM)))
8845 tmp = op0, op0 = op1, op1 = tmp;
8846 code = swap_condition (code);
8849 if (GET_CODE (op0) != REG)
8850 op0 = force_reg (op_mode, op0);
8852 if (CONSTANT_P (op1))
8854 if (standard_80387_constant_p (op1))
8855 op1 = force_reg (op_mode, op1);
8857 op1 = validize_mem (force_const_mem (op_mode, op1));
8861 /* Try to rearrange the comparison to make it cheaper. */
8862 if (ix86_fp_comparison_cost (code)
8863 > ix86_fp_comparison_cost (swap_condition (code))
8864 && (GET_CODE (op1) == REG || !no_new_pseudos))
8867 tmp = op0, op0 = op1, op1 = tmp;
8868 code = swap_condition (code);
8869 if (GET_CODE (op0) != REG)
8870 op0 = force_reg (op_mode, op0);
8878 /* Convert comparison codes we use to represent FP comparison to integer
8879 code that will result in proper branch. Return UNKNOWN if no such code
8881 static enum rtx_code
8882 ix86_fp_compare_code_to_integer (enum rtx_code code)
8911 /* Split comparison code CODE into comparisons we can do using branch
8912 instructions. BYPASS_CODE is comparison code for branch that will
8913 branch around FIRST_CODE and SECOND_CODE. If some of branches
8914 is not required, set value to NIL.
8915 We never require more than two branches. */
8917 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8918 enum rtx_code *first_code,
8919 enum rtx_code *second_code)
8925 /* The fcomi comparison sets flags as follows:
8935 case GT: /* GTU - CF=0 & ZF=0 */
8936 case GE: /* GEU - CF=0 */
8937 case ORDERED: /* PF=0 */
8938 case UNORDERED: /* PF=1 */
8939 case UNEQ: /* EQ - ZF=1 */
8940 case UNLT: /* LTU - CF=1 */
8941 case UNLE: /* LEU - CF=1 | ZF=1 */
8942 case LTGT: /* EQ - ZF=0 */
8944 case LT: /* LTU - CF=1 - fails on unordered */
8946 *bypass_code = UNORDERED;
8948 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8950 *bypass_code = UNORDERED;
8952 case EQ: /* EQ - ZF=1 - fails on unordered */
8954 *bypass_code = UNORDERED;
8956 case NE: /* NE - ZF=0 - fails on unordered */
8958 *second_code = UNORDERED;
8960 case UNGE: /* GEU - CF=0 - fails on unordered */
8962 *second_code = UNORDERED;
8964 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8966 *second_code = UNORDERED;
8971 if (!TARGET_IEEE_FP)
8978 /* Return cost of comparison done fcom + arithmetics operations on AX.
8979 All following functions do use number of instructions as a cost metrics.
8980 In future this should be tweaked to compute bytes for optimize_size and
8981 take into account performance of various instructions on various CPUs. */
8983 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8985 if (!TARGET_IEEE_FP)
8987 /* The cost of code output by ix86_expand_fp_compare. */
9015 /* Return cost of comparison done using fcomi operation.
9016 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9018 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9020 enum rtx_code bypass_code, first_code, second_code;
9021 /* Return arbitrarily high cost when instruction is not supported - this
9022 prevents gcc from using it. */
9025 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9026 return (bypass_code != NIL || second_code != NIL) + 2;
9029 /* Return cost of comparison done using sahf operation.
9030 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9032 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9034 enum rtx_code bypass_code, first_code, second_code;
9035 /* Return arbitrarily high cost when instruction is not preferred - this
9036 avoids gcc from using it. */
9037 if (!TARGET_USE_SAHF && !optimize_size)
9039 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9040 return (bypass_code != NIL || second_code != NIL) + 3;
9043 /* Compute cost of the comparison done using any method.
9044 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9046 ix86_fp_comparison_cost (enum rtx_code code)
9048 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9051 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9052 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9054 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9055 if (min > sahf_cost)
9057 if (min > fcomi_cost)
9062 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9065 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9066 rtx *second_test, rtx *bypass_test)
9068 enum machine_mode fpcmp_mode, intcmp_mode;
9070 int cost = ix86_fp_comparison_cost (code);
9071 enum rtx_code bypass_code, first_code, second_code;
9073 fpcmp_mode = ix86_fp_compare_mode (code);
9074 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9077 *second_test = NULL_RTX;
9079 *bypass_test = NULL_RTX;
9081 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9083 /* Do fcomi/sahf based test when profitable. */
9084 if ((bypass_code == NIL || bypass_test)
9085 && (second_code == NIL || second_test)
9086 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9090 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9091 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9097 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9098 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9100 scratch = gen_reg_rtx (HImode);
9101 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9102 emit_insn (gen_x86_sahf_1 (scratch));
9105 /* The FP codes work out to act like unsigned. */
9106 intcmp_mode = fpcmp_mode;
9108 if (bypass_code != NIL)
9109 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9110 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9112 if (second_code != NIL)
9113 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9114 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9119 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9120 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9121 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9123 scratch = gen_reg_rtx (HImode);
9124 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9126 /* In the unordered case, we have to check C2 for NaN's, which
9127 doesn't happen to work out to anything nice combination-wise.
9128 So do some bit twiddling on the value we've got in AH to come
9129 up with an appropriate set of condition codes. */
9131 intcmp_mode = CCNOmode;
9136 if (code == GT || !TARGET_IEEE_FP)
9138 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9143 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9144 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9145 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9146 intcmp_mode = CCmode;
9152 if (code == LT && TARGET_IEEE_FP)
9154 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9155 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9156 intcmp_mode = CCmode;
9161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9167 if (code == GE || !TARGET_IEEE_FP)
9169 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9174 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9175 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9182 if (code == LE && TARGET_IEEE_FP)
9184 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9185 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9186 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9187 intcmp_mode = CCmode;
9192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9198 if (code == EQ && TARGET_IEEE_FP)
9200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9201 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9202 intcmp_mode = CCmode;
9207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9214 if (code == NE && TARGET_IEEE_FP)
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9233 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9242 /* Return the test that should be put into the flags user, i.e.
9243 the bcc, scc, or cmov instruction. */
9244 return gen_rtx_fmt_ee (code, VOIDmode,
9245 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9250 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9253 op0 = ix86_compare_op0;
9254 op1 = ix86_compare_op1;
9257 *second_test = NULL_RTX;
9259 *bypass_test = NULL_RTX;
9261 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9262 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9263 second_test, bypass_test);
9265 ret = ix86_expand_int_compare (code, op0, op1);
9270 /* Return true if the CODE will result in nontrivial jump sequence. */
9272 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9274 enum rtx_code bypass_code, first_code, second_code;
9277 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9278 return bypass_code != NIL || second_code != NIL;
9282 ix86_expand_branch (enum rtx_code code, rtx label)
9286 switch (GET_MODE (ix86_compare_op0))
9292 tmp = ix86_expand_compare (code, NULL, NULL);
9293 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9294 gen_rtx_LABEL_REF (VOIDmode, label),
9296 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9305 enum rtx_code bypass_code, first_code, second_code;
9307 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9310 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9312 /* Check whether we will use the natural sequence with one jump. If
9313 so, we can expand jump early. Otherwise delay expansion by
9314 creating compound insn to not confuse optimizers. */
9315 if (bypass_code == NIL && second_code == NIL
9318 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9319 gen_rtx_LABEL_REF (VOIDmode, label),
9324 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9325 ix86_compare_op0, ix86_compare_op1);
9326 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9327 gen_rtx_LABEL_REF (VOIDmode, label),
9329 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9331 use_fcomi = ix86_use_fcomi_compare (code);
9332 vec = rtvec_alloc (3 + !use_fcomi);
9333 RTVEC_ELT (vec, 0) = tmp;
9335 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9337 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9340 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9342 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9350 /* Expand DImode branch into multiple compare+branch. */
9352 rtx lo[2], hi[2], label2;
9353 enum rtx_code code1, code2, code3;
9355 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9357 tmp = ix86_compare_op0;
9358 ix86_compare_op0 = ix86_compare_op1;
9359 ix86_compare_op1 = tmp;
9360 code = swap_condition (code);
9362 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9363 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9365 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9366 avoid two branches. This costs one extra insn, so disable when
9367 optimizing for size. */
9369 if ((code == EQ || code == NE)
9371 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9376 if (hi[1] != const0_rtx)
9377 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9378 NULL_RTX, 0, OPTAB_WIDEN);
9381 if (lo[1] != const0_rtx)
9382 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9383 NULL_RTX, 0, OPTAB_WIDEN);
9385 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9386 NULL_RTX, 0, OPTAB_WIDEN);
9388 ix86_compare_op0 = tmp;
9389 ix86_compare_op1 = const0_rtx;
9390 ix86_expand_branch (code, label);
9394 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9395 op1 is a constant and the low word is zero, then we can just
9396 examine the high word. */
9398 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9401 case LT: case LTU: case GE: case GEU:
9402 ix86_compare_op0 = hi[0];
9403 ix86_compare_op1 = hi[1];
9404 ix86_expand_branch (code, label);
9410 /* Otherwise, we need two or three jumps. */
9412 label2 = gen_label_rtx ();
9415 code2 = swap_condition (code);
9416 code3 = unsigned_condition (code);
9420 case LT: case GT: case LTU: case GTU:
9423 case LE: code1 = LT; code2 = GT; break;
9424 case GE: code1 = GT; code2 = LT; break;
9425 case LEU: code1 = LTU; code2 = GTU; break;
9426 case GEU: code1 = GTU; code2 = LTU; break;
9428 case EQ: code1 = NIL; code2 = NE; break;
9429 case NE: code2 = NIL; break;
9437 * if (hi(a) < hi(b)) goto true;
9438 * if (hi(a) > hi(b)) goto false;
9439 * if (lo(a) < lo(b)) goto true;
9443 ix86_compare_op0 = hi[0];
9444 ix86_compare_op1 = hi[1];
9447 ix86_expand_branch (code1, label);
9449 ix86_expand_branch (code2, label2);
9451 ix86_compare_op0 = lo[0];
9452 ix86_compare_op1 = lo[1];
9453 ix86_expand_branch (code3, label);
9456 emit_label (label2);
9465 /* Split branch based on floating point condition. */
9467 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9468 rtx target1, rtx target2, rtx tmp)
9471 rtx label = NULL_RTX;
9473 int bypass_probability = -1, second_probability = -1, probability = -1;
9476 if (target2 != pc_rtx)
9479 code = reverse_condition_maybe_unordered (code);
9484 condition = ix86_expand_fp_compare (code, op1, op2,
9485 tmp, &second, &bypass);
9487 if (split_branch_probability >= 0)
9489 /* Distribute the probabilities across the jumps.
9490 Assume the BYPASS and SECOND to be always test
9492 probability = split_branch_probability;
9494 /* Value of 1 is low enough to make no need for probability
9495 to be updated. Later we may run some experiments and see
9496 if unordered values are more frequent in practice. */
9498 bypass_probability = 1;
9500 second_probability = 1;
9502 if (bypass != NULL_RTX)
9504 label = gen_label_rtx ();
9505 i = emit_jump_insn (gen_rtx_SET
9507 gen_rtx_IF_THEN_ELSE (VOIDmode,
9509 gen_rtx_LABEL_REF (VOIDmode,
9512 if (bypass_probability >= 0)
9514 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9515 GEN_INT (bypass_probability),
9518 i = emit_jump_insn (gen_rtx_SET
9520 gen_rtx_IF_THEN_ELSE (VOIDmode,
9521 condition, target1, target2)));
9522 if (probability >= 0)
9524 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9525 GEN_INT (probability),
9527 if (second != NULL_RTX)
9529 i = emit_jump_insn (gen_rtx_SET
9531 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9533 if (second_probability >= 0)
9535 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9536 GEN_INT (second_probability),
9539 if (label != NULL_RTX)
9544 ix86_expand_setcc (enum rtx_code code, rtx dest)
9546 rtx ret, tmp, tmpreg, equiv;
9547 rtx second_test, bypass_test;
9549 if (GET_MODE (ix86_compare_op0) == DImode
9551 return 0; /* FAIL */
9553 if (GET_MODE (dest) != QImode)
9556 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9557 PUT_MODE (ret, QImode);
9562 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9563 if (bypass_test || second_test)
9565 rtx test = second_test;
9567 rtx tmp2 = gen_reg_rtx (QImode);
9574 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9576 PUT_MODE (test, QImode);
9577 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9580 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9582 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9585 /* Attach a REG_EQUAL note describing the comparison result. */
9586 equiv = simplify_gen_relational (code, QImode,
9587 GET_MODE (ix86_compare_op0),
9588 ix86_compare_op0, ix86_compare_op1);
9589 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9591 return 1; /* DONE */
9594 /* Expand comparison setting or clearing carry flag. Return true when
9595 successful and set pop for the operation. */
9597 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9599 enum machine_mode mode =
9600 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9602 /* Do not handle DImode compares that go trought special path. Also we can't
9603 deal with FP compares yet. This is possible to add. */
9604 if ((mode == DImode && !TARGET_64BIT))
9606 if (FLOAT_MODE_P (mode))
9608 rtx second_test = NULL, bypass_test = NULL;
9609 rtx compare_op, compare_seq;
9611 /* Shortcut: following common codes never translate into carry flag compares. */
9612 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9613 || code == ORDERED || code == UNORDERED)
9616 /* These comparisons require zero flag; swap operands so they won't. */
9617 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9623 code = swap_condition (code);
9626 /* Try to expand the comparison and verify that we end up with carry flag
9627 based comparison. This is fails to be true only when we decide to expand
9628 comparison using arithmetic that is not too common scenario. */
9630 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9631 &second_test, &bypass_test);
9632 compare_seq = get_insns ();
9635 if (second_test || bypass_test)
9637 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9638 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9639 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9641 code = GET_CODE (compare_op);
9642 if (code != LTU && code != GEU)
9644 emit_insn (compare_seq);
9648 if (!INTEGRAL_MODE_P (mode))
9656 /* Convert a==0 into (unsigned)a<1. */
9659 if (op1 != const0_rtx)
9662 code = (code == EQ ? LTU : GEU);
9665 /* Convert a>b into b<a or a>=b-1. */
9668 if (GET_CODE (op1) == CONST_INT)
9670 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9671 /* Bail out on overflow. We still can swap operands but that
9672 would force loading of the constant into register. */
9673 if (op1 == const0_rtx
9674 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9676 code = (code == GTU ? GEU : LTU);
9683 code = (code == GTU ? LTU : GEU);
9687 /* Convert a>=0 into (unsigned)a<0x80000000. */
9690 if (mode == DImode || op1 != const0_rtx)
9692 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9693 code = (code == LT ? GEU : LTU);
9697 if (mode == DImode || op1 != constm1_rtx)
9699 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9700 code = (code == LE ? GEU : LTU);
9706 /* Swapping operands may cause constant to appear as first operand. */
9707 if (!nonimmediate_operand (op0, VOIDmode))
9711 op0 = force_reg (mode, op0);
9713 ix86_compare_op0 = op0;
9714 ix86_compare_op1 = op1;
9715 *pop = ix86_expand_compare (code, NULL, NULL);
9716 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9722 ix86_expand_int_movcc (rtx operands[])
9724 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9725 rtx compare_seq, compare_op;
9726 rtx second_test, bypass_test;
9727 enum machine_mode mode = GET_MODE (operands[0]);
9728 bool sign_bit_compare_p = false;;
9731 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9732 compare_seq = get_insns ();
9735 compare_code = GET_CODE (compare_op);
9737 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9738 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9739 sign_bit_compare_p = true;
9741 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9742 HImode insns, we'd be swallowed in word prefix ops. */
9744 if ((mode != HImode || TARGET_FAST_PREFIX)
9745 && (mode != DImode || TARGET_64BIT)
9746 && GET_CODE (operands[2]) == CONST_INT
9747 && GET_CODE (operands[3]) == CONST_INT)
9749 rtx out = operands[0];
9750 HOST_WIDE_INT ct = INTVAL (operands[2]);
9751 HOST_WIDE_INT cf = INTVAL (operands[3]);
9755 /* Sign bit compares are better done using shifts than we do by using
9757 if (sign_bit_compare_p
9758 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9759 ix86_compare_op1, &compare_op))
9761 /* Detect overlap between destination and compare sources. */
9764 if (!sign_bit_compare_p)
9768 compare_code = GET_CODE (compare_op);
9770 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9771 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9774 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9777 /* To simplify rest of code, restrict to the GEU case. */
9778 if (compare_code == LTU)
9780 HOST_WIDE_INT tmp = ct;
9783 compare_code = reverse_condition (compare_code);
9784 code = reverse_condition (code);
9789 PUT_CODE (compare_op,
9790 reverse_condition_maybe_unordered
9791 (GET_CODE (compare_op)));
9793 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9797 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9798 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9799 tmp = gen_reg_rtx (mode);
9802 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9804 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9808 if (code == GT || code == GE)
9809 code = reverse_condition (code);
9812 HOST_WIDE_INT tmp = ct;
9817 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9818 ix86_compare_op1, VOIDmode, 0, -1);
9831 tmp = expand_simple_binop (mode, PLUS,
9833 copy_rtx (tmp), 1, OPTAB_DIRECT);
9844 tmp = expand_simple_binop (mode, IOR,
9846 copy_rtx (tmp), 1, OPTAB_DIRECT);
9848 else if (diff == -1 && ct)
9858 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9860 tmp = expand_simple_binop (mode, PLUS,
9861 copy_rtx (tmp), GEN_INT (cf),
9862 copy_rtx (tmp), 1, OPTAB_DIRECT);
9870 * andl cf - ct, dest
9880 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9883 tmp = expand_simple_binop (mode, AND,
9885 gen_int_mode (cf - ct, mode),
9886 copy_rtx (tmp), 1, OPTAB_DIRECT);
9888 tmp = expand_simple_binop (mode, PLUS,
9889 copy_rtx (tmp), GEN_INT (ct),
9890 copy_rtx (tmp), 1, OPTAB_DIRECT);
9893 if (!rtx_equal_p (tmp, out))
9894 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9896 return 1; /* DONE */
9902 tmp = ct, ct = cf, cf = tmp;
9904 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9906 /* We may be reversing unordered compare to normal compare, that
9907 is not valid in general (we may convert non-trapping condition
9908 to trapping one), however on i386 we currently emit all
9909 comparisons unordered. */
9910 compare_code = reverse_condition_maybe_unordered (compare_code);
9911 code = reverse_condition_maybe_unordered (code);
9915 compare_code = reverse_condition (compare_code);
9916 code = reverse_condition (code);
9921 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9922 && GET_CODE (ix86_compare_op1) == CONST_INT)
9924 if (ix86_compare_op1 == const0_rtx
9925 && (code == LT || code == GE))
9926 compare_code = code;
9927 else if (ix86_compare_op1 == constm1_rtx)
9931 else if (code == GT)
9936 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9937 if (compare_code != NIL
9938 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9939 && (cf == -1 || ct == -1))
9941 /* If lea code below could be used, only optimize
9942 if it results in a 2 insn sequence. */
9944 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9945 || diff == 3 || diff == 5 || diff == 9)
9946 || (compare_code == LT && ct == -1)
9947 || (compare_code == GE && cf == -1))
9950 * notl op1 (if necessary)
9958 code = reverse_condition (code);
9961 out = emit_store_flag (out, code, ix86_compare_op0,
9962 ix86_compare_op1, VOIDmode, 0, -1);
9964 out = expand_simple_binop (mode, IOR,
9966 out, 1, OPTAB_DIRECT);
9967 if (out != operands[0])
9968 emit_move_insn (operands[0], out);
9970 return 1; /* DONE */
9975 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9976 || diff == 3 || diff == 5 || diff == 9)
9977 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9978 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9984 * lea cf(dest*(ct-cf)),dest
9988 * This also catches the degenerate setcc-only case.
9994 out = emit_store_flag (out, code, ix86_compare_op0,
9995 ix86_compare_op1, VOIDmode, 0, 1);
9998 /* On x86_64 the lea instruction operates on Pmode, so we need
9999 to get arithmetics done in proper mode to match. */
10001 tmp = copy_rtx (out);
10005 out1 = copy_rtx (out);
10006 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10010 tmp = gen_rtx_PLUS (mode, tmp, out1);
10016 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10019 if (!rtx_equal_p (tmp, out))
10022 out = force_operand (tmp, copy_rtx (out));
10024 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10026 if (!rtx_equal_p (out, operands[0]))
10027 emit_move_insn (operands[0], copy_rtx (out));
10029 return 1; /* DONE */
10033 * General case: Jumpful:
10034 * xorl dest,dest cmpl op1, op2
10035 * cmpl op1, op2 movl ct, dest
10036 * setcc dest jcc 1f
10037 * decl dest movl cf, dest
10038 * andl (cf-ct),dest 1:
10041 * Size 20. Size 14.
10043 * This is reasonably steep, but branch mispredict costs are
10044 * high on modern cpus, so consider failing only if optimizing
10048 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10049 && BRANCH_COST >= 2)
10055 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10056 /* We may be reversing unordered compare to normal compare,
10057 that is not valid in general (we may convert non-trapping
10058 condition to trapping one), however on i386 we currently
10059 emit all comparisons unordered. */
10060 code = reverse_condition_maybe_unordered (code);
10063 code = reverse_condition (code);
10064 if (compare_code != NIL)
10065 compare_code = reverse_condition (compare_code);
10069 if (compare_code != NIL)
10071 /* notl op1 (if needed)
10076 For x < 0 (resp. x <= -1) there will be no notl,
10077 so if possible swap the constants to get rid of the
10079 True/false will be -1/0 while code below (store flag
10080 followed by decrement) is 0/-1, so the constants need
10081 to be exchanged once more. */
10083 if (compare_code == GE || !cf)
10085 code = reverse_condition (code);
10090 HOST_WIDE_INT tmp = cf;
10095 out = emit_store_flag (out, code, ix86_compare_op0,
10096 ix86_compare_op1, VOIDmode, 0, -1);
10100 out = emit_store_flag (out, code, ix86_compare_op0,
10101 ix86_compare_op1, VOIDmode, 0, 1);
10103 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10104 copy_rtx (out), 1, OPTAB_DIRECT);
10107 out = expand_simple_binop (mode, AND, copy_rtx (out),
10108 gen_int_mode (cf - ct, mode),
10109 copy_rtx (out), 1, OPTAB_DIRECT);
10111 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10112 copy_rtx (out), 1, OPTAB_DIRECT);
10113 if (!rtx_equal_p (out, operands[0]))
10114 emit_move_insn (operands[0], copy_rtx (out));
10116 return 1; /* DONE */
10120 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10122 /* Try a few things more with specific constants and a variable. */
10125 rtx var, orig_out, out, tmp;
10127 if (BRANCH_COST <= 2)
10128 return 0; /* FAIL */
10130 /* If one of the two operands is an interesting constant, load a
10131 constant with the above and mask it in with a logical operation. */
10133 if (GET_CODE (operands[2]) == CONST_INT)
10136 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10137 operands[3] = constm1_rtx, op = and_optab;
10138 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10139 operands[3] = const0_rtx, op = ior_optab;
10141 return 0; /* FAIL */
10143 else if (GET_CODE (operands[3]) == CONST_INT)
10146 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10147 operands[2] = constm1_rtx, op = and_optab;
10148 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10149 operands[2] = const0_rtx, op = ior_optab;
10151 return 0; /* FAIL */
10154 return 0; /* FAIL */
10156 orig_out = operands[0];
10157 tmp = gen_reg_rtx (mode);
10160 /* Recurse to get the constant loaded. */
10161 if (ix86_expand_int_movcc (operands) == 0)
10162 return 0; /* FAIL */
10164 /* Mask in the interesting variable. */
10165 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10167 if (!rtx_equal_p (out, orig_out))
10168 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10170 return 1; /* DONE */
10174 * For comparison with above,
10184 if (! nonimmediate_operand (operands[2], mode))
10185 operands[2] = force_reg (mode, operands[2]);
10186 if (! nonimmediate_operand (operands[3], mode))
10187 operands[3] = force_reg (mode, operands[3]);
10189 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10191 rtx tmp = gen_reg_rtx (mode);
10192 emit_move_insn (tmp, operands[3]);
10195 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10197 rtx tmp = gen_reg_rtx (mode);
10198 emit_move_insn (tmp, operands[2]);
10202 if (! register_operand (operands[2], VOIDmode)
10204 || ! register_operand (operands[3], VOIDmode)))
10205 operands[2] = force_reg (mode, operands[2]);
10208 && ! register_operand (operands[3], VOIDmode))
10209 operands[3] = force_reg (mode, operands[3]);
10211 emit_insn (compare_seq);
10212 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10213 gen_rtx_IF_THEN_ELSE (mode,
10214 compare_op, operands[2],
10217 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10218 gen_rtx_IF_THEN_ELSE (mode,
10220 copy_rtx (operands[3]),
10221 copy_rtx (operands[0]))));
10223 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10224 gen_rtx_IF_THEN_ELSE (mode,
10226 copy_rtx (operands[2]),
10227 copy_rtx (operands[0]))));
10229 return 1; /* DONE */
10233 ix86_expand_fp_movcc (rtx operands[])
10235 enum rtx_code code;
10237 rtx compare_op, second_test, bypass_test;
10239 /* For SF/DFmode conditional moves based on comparisons
10240 in same mode, we may want to use SSE min/max instructions. */
10241 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10242 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10243 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10244 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10245 && (!TARGET_IEEE_FP
10246 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10247 /* We may be called from the post-reload splitter. */
10248 && (!REG_P (operands[0])
10249 || SSE_REG_P (operands[0])
10250 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10252 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10253 code = GET_CODE (operands[1]);
10255 /* See if we have (cross) match between comparison operands and
10256 conditional move operands. */
10257 if (rtx_equal_p (operands[2], op1))
10262 code = reverse_condition_maybe_unordered (code);
10264 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10266 /* Check for min operation. */
10267 if (code == LT || code == UNLE)
10275 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10276 if (memory_operand (op0, VOIDmode))
10277 op0 = force_reg (GET_MODE (operands[0]), op0);
10278 if (GET_MODE (operands[0]) == SFmode)
10279 emit_insn (gen_minsf3 (operands[0], op0, op1));
10281 emit_insn (gen_mindf3 (operands[0], op0, op1));
10284 /* Check for max operation. */
10285 if (code == GT || code == UNGE)
10293 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10294 if (memory_operand (op0, VOIDmode))
10295 op0 = force_reg (GET_MODE (operands[0]), op0);
10296 if (GET_MODE (operands[0]) == SFmode)
10297 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10299 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10303 /* Manage condition to be sse_comparison_operator. In case we are
10304 in non-ieee mode, try to canonicalize the destination operand
10305 to be first in the comparison - this helps reload to avoid extra
10307 if (!sse_comparison_operator (operands[1], VOIDmode)
10308 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10310 rtx tmp = ix86_compare_op0;
10311 ix86_compare_op0 = ix86_compare_op1;
10312 ix86_compare_op1 = tmp;
10313 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10314 VOIDmode, ix86_compare_op0,
10317 /* Similarly try to manage result to be first operand of conditional
10318 move. We also don't support the NE comparison on SSE, so try to
10320 if ((rtx_equal_p (operands[0], operands[3])
10321 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10322 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10324 rtx tmp = operands[2];
10325 operands[2] = operands[3];
10327 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10328 (GET_CODE (operands[1])),
10329 VOIDmode, ix86_compare_op0,
10332 if (GET_MODE (operands[0]) == SFmode)
10333 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10334 operands[2], operands[3],
10335 ix86_compare_op0, ix86_compare_op1));
10337 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10338 operands[2], operands[3],
10339 ix86_compare_op0, ix86_compare_op1));
10343 /* The floating point conditional move instructions don't directly
10344 support conditions resulting from a signed integer comparison. */
10346 code = GET_CODE (operands[1]);
10347 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10349 /* The floating point conditional move instructions don't directly
10350 support signed integer comparisons. */
10352 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10354 if (second_test != NULL || bypass_test != NULL)
10356 tmp = gen_reg_rtx (QImode);
10357 ix86_expand_setcc (code, tmp);
10359 ix86_compare_op0 = tmp;
10360 ix86_compare_op1 = const0_rtx;
10361 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10363 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10365 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10366 emit_move_insn (tmp, operands[3]);
10369 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10371 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10372 emit_move_insn (tmp, operands[2]);
10376 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10377 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10382 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10383 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10388 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10389 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10397 /* Expand conditional increment or decrement using adb/sbb instructions.
10398 The default case using setcc followed by the conditional move can be
10399 done by generic code. */
10401 ix86_expand_int_addcc (rtx operands[])
10403 enum rtx_code code = GET_CODE (operands[1]);
10405 rtx val = const0_rtx;
10406 bool fpcmp = false;
10407 enum machine_mode mode = GET_MODE (operands[0]);
10409 if (operands[3] != const1_rtx
10410 && operands[3] != constm1_rtx)
10412 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10413 ix86_compare_op1, &compare_op))
10415 code = GET_CODE (compare_op);
10417 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10418 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10421 code = ix86_fp_compare_code_to_integer (code);
10428 PUT_CODE (compare_op,
10429 reverse_condition_maybe_unordered
10430 (GET_CODE (compare_op)));
10432 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10434 PUT_MODE (compare_op, mode);
10436 /* Construct either adc or sbb insn. */
10437 if ((code == LTU) == (operands[3] == constm1_rtx))
10439 switch (GET_MODE (operands[0]))
10442 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10445 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10448 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10451 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10459 switch (GET_MODE (operands[0]))
10462 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10465 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10468 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10471 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10477 return 1; /* DONE */
10481 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10482 works for floating pointer parameters and nonoffsetable memories.
10483 For pushes, it returns just stack offsets; the values will be saved
10484 in the right order. Maximally three parts are generated. */
10487 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10492 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10494 size = (GET_MODE_SIZE (mode) + 4) / 8;
10496 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10498 if (size < 2 || size > 3)
10501 /* Optimize constant pool reference to immediates. This is used by fp
10502 moves, that force all constants to memory to allow combining. */
10503 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10505 rtx tmp = maybe_get_pool_constant (operand);
10510 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10512 /* The only non-offsetable memories we handle are pushes. */
10513 if (! push_operand (operand, VOIDmode))
10516 operand = copy_rtx (operand);
10517 PUT_MODE (operand, Pmode);
10518 parts[0] = parts[1] = parts[2] = operand;
10520 else if (!TARGET_64BIT)
10522 if (mode == DImode)
10523 split_di (&operand, 1, &parts[0], &parts[1]);
10526 if (REG_P (operand))
10528 if (!reload_completed)
10530 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10531 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10533 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10535 else if (offsettable_memref_p (operand))
10537 operand = adjust_address (operand, SImode, 0);
10538 parts[0] = operand;
10539 parts[1] = adjust_address (operand, SImode, 4);
10541 parts[2] = adjust_address (operand, SImode, 8);
10543 else if (GET_CODE (operand) == CONST_DOUBLE)
10548 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10552 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10553 parts[2] = gen_int_mode (l[2], SImode);
10556 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10561 parts[1] = gen_int_mode (l[1], SImode);
10562 parts[0] = gen_int_mode (l[0], SImode);
10570 if (mode == TImode)
10571 split_ti (&operand, 1, &parts[0], &parts[1]);
10572 if (mode == XFmode || mode == TFmode)
10574 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10575 if (REG_P (operand))
10577 if (!reload_completed)
10579 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10580 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10582 else if (offsettable_memref_p (operand))
10584 operand = adjust_address (operand, DImode, 0);
10585 parts[0] = operand;
10586 parts[1] = adjust_address (operand, upper_mode, 8);
10588 else if (GET_CODE (operand) == CONST_DOUBLE)
10593 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10594 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10595 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10596 if (HOST_BITS_PER_WIDE_INT >= 64)
10599 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10600 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10603 parts[0] = immed_double_const (l[0], l[1], DImode);
10604 if (upper_mode == SImode)
10605 parts[1] = gen_int_mode (l[2], SImode);
10606 else if (HOST_BITS_PER_WIDE_INT >= 64)
10609 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10610 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10613 parts[1] = immed_double_const (l[2], l[3], DImode);
10623 /* Emit insns to perform a move or push of DI, DF, and XF values.
10624 Return false when normal moves are needed; true when all required
10625 insns have been emitted. Operands 2-4 contain the input values
10626 int the correct order; operands 5-7 contain the output values. */
10629 ix86_split_long_move (rtx operands[])
10634 int collisions = 0;
10635 enum machine_mode mode = GET_MODE (operands[0]);
10637 /* The DFmode expanders may ask us to move double.
10638 For 64bit target this is single move. By hiding the fact
10639 here we simplify i386.md splitters. */
10640 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10642 /* Optimize constant pool reference to immediates. This is used by
10643 fp moves, that force all constants to memory to allow combining. */
10645 if (GET_CODE (operands[1]) == MEM
10646 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10647 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10648 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10649 if (push_operand (operands[0], VOIDmode))
10651 operands[0] = copy_rtx (operands[0]);
10652 PUT_MODE (operands[0], Pmode);
10655 operands[0] = gen_lowpart (DImode, operands[0]);
10656 operands[1] = gen_lowpart (DImode, operands[1]);
10657 emit_move_insn (operands[0], operands[1]);
10661 /* The only non-offsettable memory we handle is push. */
10662 if (push_operand (operands[0], VOIDmode))
10664 else if (GET_CODE (operands[0]) == MEM
10665 && ! offsettable_memref_p (operands[0]))
10668 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10669 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10671 /* When emitting push, take care for source operands on the stack. */
10672 if (push && GET_CODE (operands[1]) == MEM
10673 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10676 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10677 XEXP (part[1][2], 0));
10678 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10679 XEXP (part[1][1], 0));
10682 /* We need to do copy in the right order in case an address register
10683 of the source overlaps the destination. */
10684 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10686 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10688 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10691 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10694 /* Collision in the middle part can be handled by reordering. */
10695 if (collisions == 1 && nparts == 3
10696 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10699 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10700 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10703 /* If there are more collisions, we can't handle it by reordering.
10704 Do an lea to the last part and use only one colliding move. */
10705 else if (collisions > 1)
10711 base = part[0][nparts - 1];
10713 /* Handle the case when the last part isn't valid for lea.
10714 Happens in 64-bit mode storing the 12-byte XFmode. */
10715 if (GET_MODE (base) != Pmode)
10716 base = gen_rtx_REG (Pmode, REGNO (base));
10718 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10719 part[1][0] = replace_equiv_address (part[1][0], base);
10720 part[1][1] = replace_equiv_address (part[1][1],
10721 plus_constant (base, UNITS_PER_WORD));
10723 part[1][2] = replace_equiv_address (part[1][2],
10724 plus_constant (base, 8));
10734 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10735 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10736 emit_move_insn (part[0][2], part[1][2]);
10741 /* In 64bit mode we don't have 32bit push available. In case this is
10742 register, it is OK - we will just use larger counterpart. We also
10743 retype memory - these comes from attempt to avoid REX prefix on
10744 moving of second half of TFmode value. */
10745 if (GET_MODE (part[1][1]) == SImode)
10747 if (GET_CODE (part[1][1]) == MEM)
10748 part[1][1] = adjust_address (part[1][1], DImode, 0);
10749 else if (REG_P (part[1][1]))
10750 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10753 if (GET_MODE (part[1][0]) == SImode)
10754 part[1][0] = part[1][1];
10757 emit_move_insn (part[0][1], part[1][1]);
10758 emit_move_insn (part[0][0], part[1][0]);
10762 /* Choose correct order to not overwrite the source before it is copied. */
10763 if ((REG_P (part[0][0])
10764 && REG_P (part[1][1])
10765 && (REGNO (part[0][0]) == REGNO (part[1][1])
10767 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10769 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10773 operands[2] = part[0][2];
10774 operands[3] = part[0][1];
10775 operands[4] = part[0][0];
10776 operands[5] = part[1][2];
10777 operands[6] = part[1][1];
10778 operands[7] = part[1][0];
10782 operands[2] = part[0][1];
10783 operands[3] = part[0][0];
10784 operands[5] = part[1][1];
10785 operands[6] = part[1][0];
10792 operands[2] = part[0][0];
10793 operands[3] = part[0][1];
10794 operands[4] = part[0][2];
10795 operands[5] = part[1][0];
10796 operands[6] = part[1][1];
10797 operands[7] = part[1][2];
10801 operands[2] = part[0][0];
10802 operands[3] = part[0][1];
10803 operands[5] = part[1][0];
10804 operands[6] = part[1][1];
10807 emit_move_insn (operands[2], operands[5]);
10808 emit_move_insn (operands[3], operands[6]);
10810 emit_move_insn (operands[4], operands[7]);
10816 ix86_split_ashldi (rtx *operands, rtx scratch)
10818 rtx low[2], high[2];
10821 if (GET_CODE (operands[2]) == CONST_INT)
10823 split_di (operands, 2, low, high);
10824 count = INTVAL (operands[2]) & 63;
10828 emit_move_insn (high[0], low[1]);
10829 emit_move_insn (low[0], const0_rtx);
10832 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10836 if (!rtx_equal_p (operands[0], operands[1]))
10837 emit_move_insn (operands[0], operands[1]);
10838 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10839 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10844 if (!rtx_equal_p (operands[0], operands[1]))
10845 emit_move_insn (operands[0], operands[1]);
10847 split_di (operands, 1, low, high);
10849 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10850 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10852 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10854 if (! no_new_pseudos)
10855 scratch = force_reg (SImode, const0_rtx);
10857 emit_move_insn (scratch, const0_rtx);
10859 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10863 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10868 ix86_split_ashrdi (rtx *operands, rtx scratch)
10870 rtx low[2], high[2];
10873 if (GET_CODE (operands[2]) == CONST_INT)
10875 split_di (operands, 2, low, high);
10876 count = INTVAL (operands[2]) & 63;
10880 emit_move_insn (low[0], high[1]);
10882 if (! reload_completed)
10883 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10886 emit_move_insn (high[0], low[0]);
10887 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10891 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10895 if (!rtx_equal_p (operands[0], operands[1]))
10896 emit_move_insn (operands[0], operands[1]);
10897 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10898 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10903 if (!rtx_equal_p (operands[0], operands[1]))
10904 emit_move_insn (operands[0], operands[1]);
10906 split_di (operands, 1, low, high);
10908 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10909 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10911 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10913 if (! no_new_pseudos)
10914 scratch = gen_reg_rtx (SImode);
10915 emit_move_insn (scratch, high[0]);
10916 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10917 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10921 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10926 ix86_split_lshrdi (rtx *operands, rtx scratch)
10928 rtx low[2], high[2];
10931 if (GET_CODE (operands[2]) == CONST_INT)
10933 split_di (operands, 2, low, high);
10934 count = INTVAL (operands[2]) & 63;
10938 emit_move_insn (low[0], high[1]);
10939 emit_move_insn (high[0], const0_rtx);
10942 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10946 if (!rtx_equal_p (operands[0], operands[1]))
10947 emit_move_insn (operands[0], operands[1]);
10948 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10949 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10954 if (!rtx_equal_p (operands[0], operands[1]))
10955 emit_move_insn (operands[0], operands[1]);
10957 split_di (operands, 1, low, high);
10959 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10960 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10962 /* Heh. By reversing the arguments, we can reuse this pattern. */
10963 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10965 if (! no_new_pseudos)
10966 scratch = force_reg (SImode, const0_rtx);
10968 emit_move_insn (scratch, const0_rtx);
10970 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10974 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10978 /* Helper function for the string operations below. Dest VARIABLE whether
10979 it is aligned to VALUE bytes. If true, jump to the label. */
10981 ix86_expand_aligntest (rtx variable, int value)
10983 rtx label = gen_label_rtx ();
10984 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10985 if (GET_MODE (variable) == DImode)
10986 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10988 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10989 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10994 /* Adjust COUNTER by the VALUE. */
10996 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10998 if (GET_MODE (countreg) == DImode)
10999 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11001 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11004 /* Zero extend possibly SImode EXP to Pmode register. */
11006 ix86_zero_extend_to_Pmode (rtx exp)
11009 if (GET_MODE (exp) == VOIDmode)
11010 return force_reg (Pmode, exp);
11011 if (GET_MODE (exp) == Pmode)
11012 return copy_to_mode_reg (Pmode, exp);
11013 r = gen_reg_rtx (Pmode);
11014 emit_insn (gen_zero_extendsidi2 (r, exp));
11018 /* Expand string move (memcpy) operation. Use i386 string operations when
11019 profitable. expand_clrstr contains similar code. */
11021 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11023 rtx srcreg, destreg, countreg;
11024 enum machine_mode counter_mode;
11025 HOST_WIDE_INT align = 0;
11026 unsigned HOST_WIDE_INT count = 0;
11029 if (GET_CODE (align_exp) == CONST_INT)
11030 align = INTVAL (align_exp);
11032 /* Can't use any of this if the user has appropriated esi or edi. */
11033 if (global_regs[4] || global_regs[5])
11036 /* This simple hack avoids all inlining code and simplifies code below. */
11037 if (!TARGET_ALIGN_STRINGOPS)
11040 if (GET_CODE (count_exp) == CONST_INT)
11042 count = INTVAL (count_exp);
11043 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11047 /* Figure out proper mode for counter. For 32bits it is always SImode,
11048 for 64bits use SImode when possible, otherwise DImode.
11049 Set count to number of bytes copied when known at compile time. */
11050 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11051 || x86_64_zero_extended_value (count_exp))
11052 counter_mode = SImode;
11054 counter_mode = DImode;
11058 if (counter_mode != SImode && counter_mode != DImode)
11061 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11062 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11064 emit_insn (gen_cld ());
11066 /* When optimizing for size emit simple rep ; movsb instruction for
11067 counts not divisible by 4. */
11069 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11071 countreg = ix86_zero_extend_to_Pmode (count_exp);
11073 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11074 destreg, srcreg, countreg));
11076 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11077 destreg, srcreg, countreg));
11080 /* For constant aligned (or small unaligned) copies use rep movsl
11081 followed by code copying the rest. For PentiumPro ensure 8 byte
11082 alignment to allow rep movsl acceleration. */
11084 else if (count != 0
11086 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11087 || optimize_size || count < (unsigned int) 64))
11089 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11090 if (count & ~(size - 1))
11092 countreg = copy_to_mode_reg (counter_mode,
11093 GEN_INT ((count >> (size == 4 ? 2 : 3))
11094 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11095 countreg = ix86_zero_extend_to_Pmode (countreg);
11099 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11100 destreg, srcreg, countreg));
11102 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11103 destreg, srcreg, countreg));
11106 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11107 destreg, srcreg, countreg));
11109 if (size == 8 && (count & 0x04))
11110 emit_insn (gen_strmovsi (destreg, srcreg));
11112 emit_insn (gen_strmovhi (destreg, srcreg));
11114 emit_insn (gen_strmovqi (destreg, srcreg));
11116 /* The generic code based on the glibc implementation:
11117 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11118 allowing accelerated copying there)
11119 - copy the data using rep movsl
11120 - copy the rest. */
11125 int desired_alignment = (TARGET_PENTIUMPRO
11126 && (count == 0 || count >= (unsigned int) 260)
11127 ? 8 : UNITS_PER_WORD);
11129 /* In case we don't know anything about the alignment, default to
11130 library version, since it is usually equally fast and result in
11133 Also emit call when we know that the count is large and call overhead
11134 will not be important. */
11135 if (!TARGET_INLINE_ALL_STRINGOPS
11136 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11142 if (TARGET_SINGLE_STRINGOP)
11143 emit_insn (gen_cld ());
11145 countreg2 = gen_reg_rtx (Pmode);
11146 countreg = copy_to_mode_reg (counter_mode, count_exp);
11148 /* We don't use loops to align destination and to copy parts smaller
11149 than 4 bytes, because gcc is able to optimize such code better (in
11150 the case the destination or the count really is aligned, gcc is often
11151 able to predict the branches) and also it is friendlier to the
11152 hardware branch prediction.
11154 Using loops is beneficial for generic case, because we can
11155 handle small counts using the loops. Many CPUs (such as Athlon)
11156 have large REP prefix setup costs.
11158 This is quite costly. Maybe we can revisit this decision later or
11159 add some customizability to this code. */
11161 if (count == 0 && align < desired_alignment)
11163 label = gen_label_rtx ();
11164 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11165 LEU, 0, counter_mode, 1, label);
11169 rtx label = ix86_expand_aligntest (destreg, 1);
11170 emit_insn (gen_strmovqi (destreg, srcreg));
11171 ix86_adjust_counter (countreg, 1);
11172 emit_label (label);
11173 LABEL_NUSES (label) = 1;
11177 rtx label = ix86_expand_aligntest (destreg, 2);
11178 emit_insn (gen_strmovhi (destreg, srcreg));
11179 ix86_adjust_counter (countreg, 2);
11180 emit_label (label);
11181 LABEL_NUSES (label) = 1;
11183 if (align <= 4 && desired_alignment > 4)
11185 rtx label = ix86_expand_aligntest (destreg, 4);
11186 emit_insn (gen_strmovsi (destreg, srcreg));
11187 ix86_adjust_counter (countreg, 4);
11188 emit_label (label);
11189 LABEL_NUSES (label) = 1;
11192 if (label && desired_alignment > 4 && !TARGET_64BIT)
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11198 if (!TARGET_SINGLE_STRINGOP)
11199 emit_insn (gen_cld ());
11202 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11204 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11205 destreg, srcreg, countreg2));
11209 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11210 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11211 destreg, srcreg, countreg2));
11216 emit_label (label);
11217 LABEL_NUSES (label) = 1;
11219 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11220 emit_insn (gen_strmovsi (destreg, srcreg));
11221 if ((align <= 4 || count == 0) && TARGET_64BIT)
11223 rtx label = ix86_expand_aligntest (countreg, 4);
11224 emit_insn (gen_strmovsi (destreg, srcreg));
11225 emit_label (label);
11226 LABEL_NUSES (label) = 1;
11228 if (align > 2 && count != 0 && (count & 2))
11229 emit_insn (gen_strmovhi (destreg, srcreg));
11230 if (align <= 2 || count == 0)
11232 rtx label = ix86_expand_aligntest (countreg, 2);
11233 emit_insn (gen_strmovhi (destreg, srcreg));
11234 emit_label (label);
11235 LABEL_NUSES (label) = 1;
11237 if (align > 1 && count != 0 && (count & 1))
11238 emit_insn (gen_strmovqi (destreg, srcreg));
11239 if (align <= 1 || count == 0)
11241 rtx label = ix86_expand_aligntest (countreg, 1);
11242 emit_insn (gen_strmovqi (destreg, srcreg));
11243 emit_label (label);
11244 LABEL_NUSES (label) = 1;
11248 insns = get_insns ();
11251 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11256 /* Expand string clear operation (bzero). Use i386 string operations when
11257 profitable. expand_movstr contains similar code. */
11259 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11261 rtx destreg, zeroreg, countreg;
11262 enum machine_mode counter_mode;
11263 HOST_WIDE_INT align = 0;
11264 unsigned HOST_WIDE_INT count = 0;
11266 if (GET_CODE (align_exp) == CONST_INT)
11267 align = INTVAL (align_exp);
11269 /* Can't use any of this if the user has appropriated esi. */
11270 if (global_regs[4])
11273 /* This simple hack avoids all inlining code and simplifies code below. */
11274 if (!TARGET_ALIGN_STRINGOPS)
11277 if (GET_CODE (count_exp) == CONST_INT)
11279 count = INTVAL (count_exp);
11280 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11283 /* Figure out proper mode for counter. For 32bits it is always SImode,
11284 for 64bits use SImode when possible, otherwise DImode.
11285 Set count to number of bytes copied when known at compile time. */
11286 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11287 || x86_64_zero_extended_value (count_exp))
11288 counter_mode = SImode;
11290 counter_mode = DImode;
11292 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11294 emit_insn (gen_cld ());
11296 /* When optimizing for size emit simple rep ; movsb instruction for
11297 counts not divisible by 4. */
11299 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11301 countreg = ix86_zero_extend_to_Pmode (count_exp);
11302 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11304 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11305 destreg, countreg));
11307 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11308 destreg, countreg));
11310 else if (count != 0
11312 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11313 || optimize_size || count < (unsigned int) 64))
11315 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11316 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11317 if (count & ~(size - 1))
11319 countreg = copy_to_mode_reg (counter_mode,
11320 GEN_INT ((count >> (size == 4 ? 2 : 3))
11321 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11322 countreg = ix86_zero_extend_to_Pmode (countreg);
11326 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11327 destreg, countreg));
11329 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11330 destreg, countreg));
11333 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11334 destreg, countreg));
11336 if (size == 8 && (count & 0x04))
11337 emit_insn (gen_strsetsi (destreg,
11338 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11340 emit_insn (gen_strsethi (destreg,
11341 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11343 emit_insn (gen_strsetqi (destreg,
11344 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11350 /* Compute desired alignment of the string operation. */
11351 int desired_alignment = (TARGET_PENTIUMPRO
11352 && (count == 0 || count >= (unsigned int) 260)
11353 ? 8 : UNITS_PER_WORD);
11355 /* In case we don't know anything about the alignment, default to
11356 library version, since it is usually equally fast and result in
11359 Also emit call when we know that the count is large and call overhead
11360 will not be important. */
11361 if (!TARGET_INLINE_ALL_STRINGOPS
11362 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11365 if (TARGET_SINGLE_STRINGOP)
11366 emit_insn (gen_cld ());
11368 countreg2 = gen_reg_rtx (Pmode);
11369 countreg = copy_to_mode_reg (counter_mode, count_exp);
11370 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11372 if (count == 0 && align < desired_alignment)
11374 label = gen_label_rtx ();
11375 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11376 LEU, 0, counter_mode, 1, label);
11380 rtx label = ix86_expand_aligntest (destreg, 1);
11381 emit_insn (gen_strsetqi (destreg,
11382 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11383 ix86_adjust_counter (countreg, 1);
11384 emit_label (label);
11385 LABEL_NUSES (label) = 1;
11389 rtx label = ix86_expand_aligntest (destreg, 2);
11390 emit_insn (gen_strsethi (destreg,
11391 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11392 ix86_adjust_counter (countreg, 2);
11393 emit_label (label);
11394 LABEL_NUSES (label) = 1;
11396 if (align <= 4 && desired_alignment > 4)
11398 rtx label = ix86_expand_aligntest (destreg, 4);
11399 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11400 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11402 ix86_adjust_counter (countreg, 4);
11403 emit_label (label);
11404 LABEL_NUSES (label) = 1;
11407 if (label && desired_alignment > 4 && !TARGET_64BIT)
11409 emit_label (label);
11410 LABEL_NUSES (label) = 1;
11414 if (!TARGET_SINGLE_STRINGOP)
11415 emit_insn (gen_cld ());
11418 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11420 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11421 destreg, countreg2));
11425 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11426 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11427 destreg, countreg2));
11431 emit_label (label);
11432 LABEL_NUSES (label) = 1;
11435 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11436 emit_insn (gen_strsetsi (destreg,
11437 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11438 if (TARGET_64BIT && (align <= 4 || count == 0))
11440 rtx label = ix86_expand_aligntest (countreg, 4);
11441 emit_insn (gen_strsetsi (destreg,
11442 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11443 emit_label (label);
11444 LABEL_NUSES (label) = 1;
11446 if (align > 2 && count != 0 && (count & 2))
11447 emit_insn (gen_strsethi (destreg,
11448 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11449 if (align <= 2 || count == 0)
11451 rtx label = ix86_expand_aligntest (countreg, 2);
11452 emit_insn (gen_strsethi (destreg,
11453 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11454 emit_label (label);
11455 LABEL_NUSES (label) = 1;
11457 if (align > 1 && count != 0 && (count & 1))
11458 emit_insn (gen_strsetqi (destreg,
11459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11460 if (align <= 1 || count == 0)
11462 rtx label = ix86_expand_aligntest (countreg, 1);
11463 emit_insn (gen_strsetqi (destreg,
11464 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11465 emit_label (label);
11466 LABEL_NUSES (label) = 1;
11471 /* Expand strlen. */
11473 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11475 rtx addr, scratch1, scratch2, scratch3, scratch4;
11477 /* The generic case of strlen expander is long. Avoid it's
11478 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11480 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11481 && !TARGET_INLINE_ALL_STRINGOPS
11483 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11486 addr = force_reg (Pmode, XEXP (src, 0));
11487 scratch1 = gen_reg_rtx (Pmode);
11489 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11492 /* Well it seems that some optimizer does not combine a call like
11493 foo(strlen(bar), strlen(bar));
11494 when the move and the subtraction is done here. It does calculate
11495 the length just once when these instructions are done inside of
11496 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11497 often used and I use one fewer register for the lifetime of
11498 output_strlen_unroll() this is better. */
11500 emit_move_insn (out, addr);
11502 ix86_expand_strlensi_unroll_1 (out, align);
11504 /* strlensi_unroll_1 returns the address of the zero at the end of
11505 the string, like memchr(), so compute the length by subtracting
11506 the start address. */
11508 emit_insn (gen_subdi3 (out, out, addr));
11510 emit_insn (gen_subsi3 (out, out, addr));
11514 scratch2 = gen_reg_rtx (Pmode);
11515 scratch3 = gen_reg_rtx (Pmode);
11516 scratch4 = force_reg (Pmode, constm1_rtx);
11518 emit_move_insn (scratch3, addr);
11519 eoschar = force_reg (QImode, eoschar);
11521 emit_insn (gen_cld ());
11524 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11525 align, scratch4, scratch3));
11526 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11527 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11531 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11532 align, scratch4, scratch3));
11533 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11534 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11540 /* Expand the appropriate insns for doing strlen if not just doing
11543 out = result, initialized with the start address
11544 align_rtx = alignment of the address.
11545 scratch = scratch register, initialized with the startaddress when
11546 not aligned, otherwise undefined
11548 This is just the body. It needs the initializations mentioned above and
11549 some address computing at the end. These things are done in i386.md. */
11552 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11556 rtx align_2_label = NULL_RTX;
11557 rtx align_3_label = NULL_RTX;
11558 rtx align_4_label = gen_label_rtx ();
11559 rtx end_0_label = gen_label_rtx ();
11561 rtx tmpreg = gen_reg_rtx (SImode);
11562 rtx scratch = gen_reg_rtx (SImode);
11566 if (GET_CODE (align_rtx) == CONST_INT)
11567 align = INTVAL (align_rtx);
11569 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11571 /* Is there a known alignment and is it less than 4? */
11574 rtx scratch1 = gen_reg_rtx (Pmode);
11575 emit_move_insn (scratch1, out);
11576 /* Is there a known alignment and is it not 2? */
11579 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11580 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11582 /* Leave just the 3 lower bits. */
11583 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11584 NULL_RTX, 0, OPTAB_WIDEN);
11586 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11587 Pmode, 1, align_4_label);
11588 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11589 Pmode, 1, align_2_label);
11590 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11591 Pmode, 1, align_3_label);
11595 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11596 check if is aligned to 4 - byte. */
11598 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11599 NULL_RTX, 0, OPTAB_WIDEN);
11601 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11602 Pmode, 1, align_4_label);
11605 mem = gen_rtx_MEM (QImode, out);
11607 /* Now compare the bytes. */
11609 /* Compare the first n unaligned byte on a byte per byte basis. */
11610 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11611 QImode, 1, end_0_label);
11613 /* Increment the address. */
11615 emit_insn (gen_adddi3 (out, out, const1_rtx));
11617 emit_insn (gen_addsi3 (out, out, const1_rtx));
11619 /* Not needed with an alignment of 2 */
11622 emit_label (align_2_label);
11624 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11628 emit_insn (gen_adddi3 (out, out, const1_rtx));
11630 emit_insn (gen_addsi3 (out, out, const1_rtx));
11632 emit_label (align_3_label);
11635 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11639 emit_insn (gen_adddi3 (out, out, const1_rtx));
11641 emit_insn (gen_addsi3 (out, out, const1_rtx));
11644 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11645 align this loop. It gives only huge programs, but does not help to
11647 emit_label (align_4_label);
11649 mem = gen_rtx_MEM (SImode, out);
11650 emit_move_insn (scratch, mem);
11652 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11654 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11656 /* This formula yields a nonzero result iff one of the bytes is zero.
11657 This saves three branches inside loop and many cycles. */
11659 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11660 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11661 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11662 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11663 gen_int_mode (0x80808080, SImode)));
11664 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11669 rtx reg = gen_reg_rtx (SImode);
11670 rtx reg2 = gen_reg_rtx (Pmode);
11671 emit_move_insn (reg, tmpreg);
11672 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11674 /* If zero is not in the first two bytes, move two bytes forward. */
11675 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11676 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11677 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11678 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11679 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11682 /* Emit lea manually to avoid clobbering of flags. */
11683 emit_insn (gen_rtx_SET (SImode, reg2,
11684 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11686 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11687 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11688 emit_insn (gen_rtx_SET (VOIDmode, out,
11689 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11696 rtx end_2_label = gen_label_rtx ();
11697 /* Is zero in the first two bytes? */
11699 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11700 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11701 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11702 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11703 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11705 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11706 JUMP_LABEL (tmp) = end_2_label;
11708 /* Not in the first two. Move two bytes forward. */
11709 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11711 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11713 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11715 emit_label (end_2_label);
11719 /* Avoid branch in fixing the byte. */
11720 tmpreg = gen_lowpart (QImode, tmpreg);
11721 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11722 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11724 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11726 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11728 emit_label (end_0_label);
11732 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11733 rtx callarg2 ATTRIBUTE_UNUSED,
11734 rtx pop, int sibcall)
11736 rtx use = NULL, call;
11738 if (pop == const0_rtx)
11740 if (TARGET_64BIT && pop)
11744 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11745 fnaddr = machopic_indirect_call_target (fnaddr);
11747 /* Static functions and indirect calls don't need the pic register. */
11748 if (! TARGET_64BIT && flag_pic
11749 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11750 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11751 use_reg (&use, pic_offset_table_rtx);
11753 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11755 rtx al = gen_rtx_REG (QImode, 0);
11756 emit_move_insn (al, callarg2);
11757 use_reg (&use, al);
11759 #endif /* TARGET_MACHO */
11761 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11763 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11764 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11766 if (sibcall && TARGET_64BIT
11767 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11770 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11771 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11772 emit_move_insn (fnaddr, addr);
11773 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11776 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11778 call = gen_rtx_SET (VOIDmode, retval, call);
11781 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11782 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11783 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11786 call = emit_call_insn (call);
11788 CALL_INSN_FUNCTION_USAGE (call) = use;
11792 /* Clear stack slot assignments remembered from previous functions.
11793 This is called from INIT_EXPANDERS once before RTL is emitted for each
11796 static struct machine_function *
11797 ix86_init_machine_status (void)
11799 struct machine_function *f;
11801 f = ggc_alloc_cleared (sizeof (struct machine_function));
11802 f->use_fast_prologue_epilogue_nregs = -1;
11807 /* Return a MEM corresponding to a stack slot with mode MODE.
11808 Allocate a new slot if necessary.
11810 The RTL for a function can have several slots available: N is
11811 which slot to use. */
11814 assign_386_stack_local (enum machine_mode mode, int n)
11816 struct stack_local_entry *s;
11818 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11821 for (s = ix86_stack_locals; s; s = s->next)
11822 if (s->mode == mode && s->n == n)
11825 s = (struct stack_local_entry *)
11826 ggc_alloc (sizeof (struct stack_local_entry));
11829 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11831 s->next = ix86_stack_locals;
11832 ix86_stack_locals = s;
11836 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11838 static GTY(()) rtx ix86_tls_symbol;
11840 ix86_tls_get_addr (void)
11843 if (!ix86_tls_symbol)
11845 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11846 (TARGET_GNU_TLS && !TARGET_64BIT)
11847 ? "___tls_get_addr"
11848 : "__tls_get_addr");
11851 return ix86_tls_symbol;
11854 /* Calculate the length of the memory address in the instruction
11855 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11858 memory_address_length (rtx addr)
11860 struct ix86_address parts;
11861 rtx base, index, disp;
11864 if (GET_CODE (addr) == PRE_DEC
11865 || GET_CODE (addr) == POST_INC
11866 || GET_CODE (addr) == PRE_MODIFY
11867 || GET_CODE (addr) == POST_MODIFY)
11870 if (! ix86_decompose_address (addr, &parts))
11874 index = parts.index;
11879 - esp as the base always wants an index,
11880 - ebp as the base always wants a displacement. */
11882 /* Register Indirect. */
11883 if (base && !index && !disp)
11885 /* esp (for its index) and ebp (for its displacement) need
11886 the two-byte modrm form. */
11887 if (addr == stack_pointer_rtx
11888 || addr == arg_pointer_rtx
11889 || addr == frame_pointer_rtx
11890 || addr == hard_frame_pointer_rtx)
11894 /* Direct Addressing. */
11895 else if (disp && !base && !index)
11900 /* Find the length of the displacement constant. */
11903 if (GET_CODE (disp) == CONST_INT
11904 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11910 /* ebp always wants a displacement. */
11911 else if (base == hard_frame_pointer_rtx)
11914 /* An index requires the two-byte modrm form.... */
11916 /* ...like esp, which always wants an index. */
11917 || base == stack_pointer_rtx
11918 || base == arg_pointer_rtx
11919 || base == frame_pointer_rtx)
11926 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11927 is set, expect that insn have 8bit immediate alternative. */
11929 ix86_attr_length_immediate_default (rtx insn, int shortform)
11933 extract_insn_cached (insn);
11934 for (i = recog_data.n_operands - 1; i >= 0; --i)
11935 if (CONSTANT_P (recog_data.operand[i]))
11940 && GET_CODE (recog_data.operand[i]) == CONST_INT
11941 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11945 switch (get_attr_mode (insn))
11956 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11961 fatal_insn ("unknown insn mode", insn);
11967 /* Compute default value for "length_address" attribute. */
11969 ix86_attr_length_address_default (rtx insn)
11973 if (get_attr_type (insn) == TYPE_LEA)
11975 rtx set = PATTERN (insn);
11976 if (GET_CODE (set) == SET)
11978 else if (GET_CODE (set) == PARALLEL
11979 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11980 set = XVECEXP (set, 0, 0);
11983 #ifdef ENABLE_CHECKING
11989 return memory_address_length (SET_SRC (set));
11992 extract_insn_cached (insn);
11993 for (i = recog_data.n_operands - 1; i >= 0; --i)
11994 if (GET_CODE (recog_data.operand[i]) == MEM)
11996 return memory_address_length (XEXP (recog_data.operand[i], 0));
12002 /* Return the maximum number of instructions a cpu can issue. */
12005 ix86_issue_rate (void)
12009 case PROCESSOR_PENTIUM:
12013 case PROCESSOR_PENTIUMPRO:
12014 case PROCESSOR_PENTIUM4:
12015 case PROCESSOR_ATHLON:
12024 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12025 by DEP_INSN and nothing set by DEP_INSN. */
12028 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12032 /* Simplify the test for uninteresting insns. */
12033 if (insn_type != TYPE_SETCC
12034 && insn_type != TYPE_ICMOV
12035 && insn_type != TYPE_FCMOV
12036 && insn_type != TYPE_IBR)
12039 if ((set = single_set (dep_insn)) != 0)
12041 set = SET_DEST (set);
12044 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12045 && XVECLEN (PATTERN (dep_insn), 0) == 2
12046 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12047 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12049 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12050 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12055 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12058 /* This test is true if the dependent insn reads the flags but
12059 not any other potentially set register. */
12060 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12063 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12069 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12070 address with operands set by DEP_INSN. */
12073 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12077 if (insn_type == TYPE_LEA
12080 addr = PATTERN (insn);
12081 if (GET_CODE (addr) == SET)
12083 else if (GET_CODE (addr) == PARALLEL
12084 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12085 addr = XVECEXP (addr, 0, 0);
12088 addr = SET_SRC (addr);
12093 extract_insn_cached (insn);
12094 for (i = recog_data.n_operands - 1; i >= 0; --i)
12095 if (GET_CODE (recog_data.operand[i]) == MEM)
12097 addr = XEXP (recog_data.operand[i], 0);
12104 return modified_in_p (addr, dep_insn);
12108 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12110 enum attr_type insn_type, dep_insn_type;
12111 enum attr_memory memory, dep_memory;
12113 int dep_insn_code_number;
12115 /* Anti and output dependencies have zero cost on all CPUs. */
12116 if (REG_NOTE_KIND (link) != 0)
12119 dep_insn_code_number = recog_memoized (dep_insn);
12121 /* If we can't recognize the insns, we can't really do anything. */
12122 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12125 insn_type = get_attr_type (insn);
12126 dep_insn_type = get_attr_type (dep_insn);
12130 case PROCESSOR_PENTIUM:
12131 /* Address Generation Interlock adds a cycle of latency. */
12132 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12135 /* ??? Compares pair with jump/setcc. */
12136 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12139 /* Floating point stores require value to be ready one cycle earlier. */
12140 if (insn_type == TYPE_FMOV
12141 && get_attr_memory (insn) == MEMORY_STORE
12142 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12146 case PROCESSOR_PENTIUMPRO:
12147 memory = get_attr_memory (insn);
12148 dep_memory = get_attr_memory (dep_insn);
12150 /* Since we can't represent delayed latencies of load+operation,
12151 increase the cost here for non-imov insns. */
12152 if (dep_insn_type != TYPE_IMOV
12153 && dep_insn_type != TYPE_FMOV
12154 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12157 /* INT->FP conversion is expensive. */
12158 if (get_attr_fp_int_src (dep_insn))
12161 /* There is one cycle extra latency between an FP op and a store. */
12162 if (insn_type == TYPE_FMOV
12163 && (set = single_set (dep_insn)) != NULL_RTX
12164 && (set2 = single_set (insn)) != NULL_RTX
12165 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12166 && GET_CODE (SET_DEST (set2)) == MEM)
12169 /* Show ability of reorder buffer to hide latency of load by executing
12170 in parallel with previous instruction in case
12171 previous instruction is not needed to compute the address. */
12172 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12173 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12175 /* Claim moves to take one cycle, as core can issue one load
12176 at time and the next load can start cycle later. */
12177 if (dep_insn_type == TYPE_IMOV
12178 || dep_insn_type == TYPE_FMOV)
12186 memory = get_attr_memory (insn);
12187 dep_memory = get_attr_memory (dep_insn);
12188 /* The esp dependency is resolved before the instruction is really
12190 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12191 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12194 /* Since we can't represent delayed latencies of load+operation,
12195 increase the cost here for non-imov insns. */
12196 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12197 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12199 /* INT->FP conversion is expensive. */
12200 if (get_attr_fp_int_src (dep_insn))
12203 /* Show ability of reorder buffer to hide latency of load by executing
12204 in parallel with previous instruction in case
12205 previous instruction is not needed to compute the address. */
12206 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12207 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12209 /* Claim moves to take one cycle, as core can issue one load
12210 at time and the next load can start cycle later. */
12211 if (dep_insn_type == TYPE_IMOV
12212 || dep_insn_type == TYPE_FMOV)
12221 case PROCESSOR_ATHLON:
12223 memory = get_attr_memory (insn);
12224 dep_memory = get_attr_memory (dep_insn);
12226 /* Show ability of reorder buffer to hide latency of load by executing
12227 in parallel with previous instruction in case
12228 previous instruction is not needed to compute the address. */
12229 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12230 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12232 enum attr_unit unit = get_attr_unit (insn);
12235 /* Because of the difference between the length of integer and
12236 floating unit pipeline preparation stages, the memory operands
12237 for floating point are cheaper.
12239 ??? For Athlon it the difference is most probably 2. */
12240 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12243 loadcost = TARGET_ATHLON ? 2 : 0;
12245 if (cost >= loadcost)
12260 struct ppro_sched_data
12263 int issued_this_cycle;
12267 static enum attr_ppro_uops
12268 ix86_safe_ppro_uops (rtx insn)
12270 if (recog_memoized (insn) >= 0)
12271 return get_attr_ppro_uops (insn);
12273 return PPRO_UOPS_MANY;
12277 ix86_dump_ppro_packet (FILE *dump)
12279 if (ix86_sched_data.ppro.decode[0])
12281 fprintf (dump, "PPRO packet: %d",
12282 INSN_UID (ix86_sched_data.ppro.decode[0]));
12283 if (ix86_sched_data.ppro.decode[1])
12284 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12285 if (ix86_sched_data.ppro.decode[2])
12286 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12287 fputc ('\n', dump);
12291 /* We're beginning a new block. Initialize data structures as necessary. */
12294 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12295 int sched_verbose ATTRIBUTE_UNUSED,
12296 int veclen ATTRIBUTE_UNUSED)
12298 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12301 /* Shift INSN to SLOT, and shift everything else down. */
12304 ix86_reorder_insn (rtx *insnp, rtx *slot)
12310 insnp[0] = insnp[1];
12311 while (++insnp != slot);
12317 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12320 enum attr_ppro_uops cur_uops;
12321 int issued_this_cycle;
12325 /* At this point .ppro.decode contains the state of the three
12326 decoders from last "cycle". That is, those insns that were
12327 actually independent. But here we're scheduling for the
12328 decoder, and we may find things that are decodable in the
12331 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12332 issued_this_cycle = 0;
12335 cur_uops = ix86_safe_ppro_uops (*insnp);
12337 /* If the decoders are empty, and we've a complex insn at the
12338 head of the priority queue, let it issue without complaint. */
12339 if (decode[0] == NULL)
12341 if (cur_uops == PPRO_UOPS_MANY)
12343 decode[0] = *insnp;
12347 /* Otherwise, search for a 2-4 uop unsn to issue. */
12348 while (cur_uops != PPRO_UOPS_FEW)
12350 if (insnp == ready)
12352 cur_uops = ix86_safe_ppro_uops (*--insnp);
12355 /* If so, move it to the head of the line. */
12356 if (cur_uops == PPRO_UOPS_FEW)
12357 ix86_reorder_insn (insnp, e_ready);
12359 /* Issue the head of the queue. */
12360 issued_this_cycle = 1;
12361 decode[0] = *e_ready--;
12364 /* Look for simple insns to fill in the other two slots. */
12365 for (i = 1; i < 3; ++i)
12366 if (decode[i] == NULL)
12368 if (ready > e_ready)
12372 cur_uops = ix86_safe_ppro_uops (*insnp);
12373 while (cur_uops != PPRO_UOPS_ONE)
12375 if (insnp == ready)
12377 cur_uops = ix86_safe_ppro_uops (*--insnp);
12380 /* Found one. Move it to the head of the queue and issue it. */
12381 if (cur_uops == PPRO_UOPS_ONE)
12383 ix86_reorder_insn (insnp, e_ready);
12384 decode[i] = *e_ready--;
12385 issued_this_cycle++;
12389 /* ??? Didn't find one. Ideally, here we would do a lazy split
12390 of 2-uop insns, issue one and queue the other. */
12394 if (issued_this_cycle == 0)
12395 issued_this_cycle = 1;
12396 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12399 /* We are about to being issuing insns for this clock cycle.
12400 Override the default sort algorithm to better slot instructions. */
12402 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12403 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12404 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12406 int n_ready = *n_readyp;
12407 rtx *e_ready = ready + n_ready - 1;
12409 /* Make sure to go ahead and initialize key items in
12410 ix86_sched_data if we are not going to bother trying to
12411 reorder the ready queue. */
12414 ix86_sched_data.ppro.issued_this_cycle = 1;
12423 case PROCESSOR_PENTIUMPRO:
12424 ix86_sched_reorder_ppro (ready, e_ready);
12429 return ix86_issue_rate ();
12432 /* We are about to issue INSN. Return the number of insns left on the
12433 ready queue that can be issued this cycle. */
12436 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12437 int can_issue_more)
12443 return can_issue_more - 1;
12445 case PROCESSOR_PENTIUMPRO:
12447 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12449 if (uops == PPRO_UOPS_MANY)
12452 ix86_dump_ppro_packet (dump);
12453 ix86_sched_data.ppro.decode[0] = insn;
12454 ix86_sched_data.ppro.decode[1] = NULL;
12455 ix86_sched_data.ppro.decode[2] = NULL;
12457 ix86_dump_ppro_packet (dump);
12458 ix86_sched_data.ppro.decode[0] = NULL;
12460 else if (uops == PPRO_UOPS_FEW)
12463 ix86_dump_ppro_packet (dump);
12464 ix86_sched_data.ppro.decode[0] = insn;
12465 ix86_sched_data.ppro.decode[1] = NULL;
12466 ix86_sched_data.ppro.decode[2] = NULL;
12470 for (i = 0; i < 3; ++i)
12471 if (ix86_sched_data.ppro.decode[i] == NULL)
12473 ix86_sched_data.ppro.decode[i] = insn;
12481 ix86_dump_ppro_packet (dump);
12482 ix86_sched_data.ppro.decode[0] = NULL;
12483 ix86_sched_data.ppro.decode[1] = NULL;
12484 ix86_sched_data.ppro.decode[2] = NULL;
12488 return --ix86_sched_data.ppro.issued_this_cycle;
12493 ia32_use_dfa_pipeline_interface (void)
12495 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12500 /* How many alternative schedules to try. This should be as wide as the
12501 scheduling freedom in the DFA, but no wider. Making this value too
12502 large results extra work for the scheduler. */
12505 ia32_multipass_dfa_lookahead (void)
12507 if (ix86_tune == PROCESSOR_PENTIUM)
12514 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12515 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12519 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12524 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12526 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12530 /* Subroutine of above to actually do the updating by recursively walking
12534 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12537 enum rtx_code code = GET_CODE (x);
12538 const char *format_ptr = GET_RTX_FORMAT (code);
12541 if (code == MEM && XEXP (x, 0) == dstreg)
12542 MEM_COPY_ATTRIBUTES (x, dstref);
12543 else if (code == MEM && XEXP (x, 0) == srcreg)
12544 MEM_COPY_ATTRIBUTES (x, srcref);
12546 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12548 if (*format_ptr == 'e')
12549 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12551 else if (*format_ptr == 'E')
12552 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12553 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12558 /* Compute the alignment given to a constant that is being placed in memory.
12559 EXP is the constant and ALIGN is the alignment that the object would
12561 The value of this function is used instead of that alignment to align
12565 ix86_constant_alignment (tree exp, int align)
12567 if (TREE_CODE (exp) == REAL_CST)
12569 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12571 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12574 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12575 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12576 return BITS_PER_WORD;
12581 /* Compute the alignment for a static variable.
12582 TYPE is the data type, and ALIGN is the alignment that
12583 the object would ordinarily have. The value of this function is used
12584 instead of that alignment to align the object. */
12587 ix86_data_alignment (tree type, int align)
12589 if (AGGREGATE_TYPE_P (type)
12590 && TYPE_SIZE (type)
12591 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12592 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12593 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12596 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12597 to 16byte boundary. */
12600 if (AGGREGATE_TYPE_P (type)
12601 && TYPE_SIZE (type)
12602 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12603 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12604 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12608 if (TREE_CODE (type) == ARRAY_TYPE)
12610 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12612 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12615 else if (TREE_CODE (type) == COMPLEX_TYPE)
12618 if (TYPE_MODE (type) == DCmode && align < 64)
12620 if (TYPE_MODE (type) == XCmode && align < 128)
12623 else if ((TREE_CODE (type) == RECORD_TYPE
12624 || TREE_CODE (type) == UNION_TYPE
12625 || TREE_CODE (type) == QUAL_UNION_TYPE)
12626 && TYPE_FIELDS (type))
12628 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12630 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12633 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12634 || TREE_CODE (type) == INTEGER_TYPE)
12636 if (TYPE_MODE (type) == DFmode && align < 64)
12638 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12645 /* Compute the alignment for a local variable.
12646 TYPE is the data type, and ALIGN is the alignment that
12647 the object would ordinarily have. The value of this macro is used
12648 instead of that alignment to align the object. */
12651 ix86_local_alignment (tree type, int align)
12653 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12654 to 16byte boundary. */
12657 if (AGGREGATE_TYPE_P (type)
12658 && TYPE_SIZE (type)
12659 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12660 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12661 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12664 if (TREE_CODE (type) == ARRAY_TYPE)
12666 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12668 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12671 else if (TREE_CODE (type) == COMPLEX_TYPE)
12673 if (TYPE_MODE (type) == DCmode && align < 64)
12675 if (TYPE_MODE (type) == XCmode && align < 128)
12678 else if ((TREE_CODE (type) == RECORD_TYPE
12679 || TREE_CODE (type) == UNION_TYPE
12680 || TREE_CODE (type) == QUAL_UNION_TYPE)
12681 && TYPE_FIELDS (type))
12683 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12685 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12688 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12689 || TREE_CODE (type) == INTEGER_TYPE)
12692 if (TYPE_MODE (type) == DFmode && align < 64)
12694 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12700 /* Emit RTL insns to initialize the variable parts of a trampoline.
12701 FNADDR is an RTX for the address of the function's pure code.
12702 CXT is an RTX for the static chain value for the function. */
12704 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12708 /* Compute offset from the end of the jmp to the target function. */
12709 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12710 plus_constant (tramp, 10),
12711 NULL_RTX, 1, OPTAB_DIRECT);
12712 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12713 gen_int_mode (0xb9, QImode));
12714 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12715 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12716 gen_int_mode (0xe9, QImode));
12717 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12722 /* Try to load address using shorter movl instead of movabs.
12723 We may want to support movq for kernel mode, but kernel does not use
12724 trampolines at the moment. */
12725 if (x86_64_zero_extended_value (fnaddr))
12727 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12728 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12729 gen_int_mode (0xbb41, HImode));
12730 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12731 gen_lowpart (SImode, fnaddr));
12736 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12737 gen_int_mode (0xbb49, HImode));
12738 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12742 /* Load static chain using movabs to r10. */
12743 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12744 gen_int_mode (0xba49, HImode));
12745 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12748 /* Jump to the r11 */
12749 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12750 gen_int_mode (0xff49, HImode));
12751 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12752 gen_int_mode (0xe3, QImode));
12754 if (offset > TRAMPOLINE_SIZE)
12758 #ifdef TRANSFER_FROM_TRAMPOLINE
12759 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12760 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12764 #define def_builtin(MASK, NAME, TYPE, CODE) \
12766 if ((MASK) & target_flags \
12767 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12768 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12769 NULL, NULL_TREE); \
12772 struct builtin_description
12774 const unsigned int mask;
12775 const enum insn_code icode;
12776 const char *const name;
12777 const enum ix86_builtins code;
12778 const enum rtx_code comparison;
12779 const unsigned int flag;
12782 static const struct builtin_description bdesc_comi[] =
12784 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12785 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12786 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12787 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12791 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12793 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12796 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12810 static const struct builtin_description bdesc_2arg[] =
12813 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12814 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12815 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12816 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12817 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12823 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12824 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12825 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12828 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12829 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12830 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12831 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12834 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12835 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12836 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12837 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12843 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12844 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12845 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12851 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12860 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12861 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12862 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12863 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12864 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12865 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12867 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12869 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12870 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12871 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12872 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12878 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12879 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12880 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12883 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12887 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12888 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12890 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12891 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12893 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12894 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12895 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12897 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12898 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12899 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12903 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12914 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12915 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12916 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12918 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12919 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12920 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12921 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12923 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12925 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12926 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12927 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12928 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12930 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12932 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12935 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12937 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12951 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12952 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12953 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12956 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12957 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12958 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12959 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12962 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12963 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12971 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12995 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13067 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13072 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13073 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13074 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13075 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13076 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13077 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13080 static const struct builtin_description bdesc_1arg[] =
13082 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13083 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13085 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13086 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13087 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13090 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13091 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13092 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13093 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13094 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13116 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13117 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13126 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13127 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13128 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13132 ix86_init_builtins (void)
13135 ix86_init_mmx_sse_builtins ();
13138 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13139 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13142 ix86_init_mmx_sse_builtins (void)
13144 const struct builtin_description * d;
13147 tree pchar_type_node = build_pointer_type (char_type_node);
13148 tree pcchar_type_node = build_pointer_type (
13149 build_type_variant (char_type_node, 1, 0));
13150 tree pfloat_type_node = build_pointer_type (float_type_node);
13151 tree pcfloat_type_node = build_pointer_type (
13152 build_type_variant (float_type_node, 1, 0));
13153 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13154 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13155 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13158 tree int_ftype_v4sf_v4sf
13159 = build_function_type_list (integer_type_node,
13160 V4SF_type_node, V4SF_type_node, NULL_TREE);
13161 tree v4si_ftype_v4sf_v4sf
13162 = build_function_type_list (V4SI_type_node,
13163 V4SF_type_node, V4SF_type_node, NULL_TREE);
13164 /* MMX/SSE/integer conversions. */
13165 tree int_ftype_v4sf
13166 = build_function_type_list (integer_type_node,
13167 V4SF_type_node, NULL_TREE);
13168 tree int64_ftype_v4sf
13169 = build_function_type_list (long_long_integer_type_node,
13170 V4SF_type_node, NULL_TREE);
13171 tree int_ftype_v8qi
13172 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13173 tree v4sf_ftype_v4sf_int
13174 = build_function_type_list (V4SF_type_node,
13175 V4SF_type_node, integer_type_node, NULL_TREE);
13176 tree v4sf_ftype_v4sf_int64
13177 = build_function_type_list (V4SF_type_node,
13178 V4SF_type_node, long_long_integer_type_node,
13180 tree v4sf_ftype_v4sf_v2si
13181 = build_function_type_list (V4SF_type_node,
13182 V4SF_type_node, V2SI_type_node, NULL_TREE);
13183 tree int_ftype_v4hi_int
13184 = build_function_type_list (integer_type_node,
13185 V4HI_type_node, integer_type_node, NULL_TREE);
13186 tree v4hi_ftype_v4hi_int_int
13187 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13188 integer_type_node, integer_type_node,
13190 /* Miscellaneous. */
13191 tree v8qi_ftype_v4hi_v4hi
13192 = build_function_type_list (V8QI_type_node,
13193 V4HI_type_node, V4HI_type_node, NULL_TREE);
13194 tree v4hi_ftype_v2si_v2si
13195 = build_function_type_list (V4HI_type_node,
13196 V2SI_type_node, V2SI_type_node, NULL_TREE);
13197 tree v4sf_ftype_v4sf_v4sf_int
13198 = build_function_type_list (V4SF_type_node,
13199 V4SF_type_node, V4SF_type_node,
13200 integer_type_node, NULL_TREE);
13201 tree v2si_ftype_v4hi_v4hi
13202 = build_function_type_list (V2SI_type_node,
13203 V4HI_type_node, V4HI_type_node, NULL_TREE);
13204 tree v4hi_ftype_v4hi_int
13205 = build_function_type_list (V4HI_type_node,
13206 V4HI_type_node, integer_type_node, NULL_TREE);
13207 tree v4hi_ftype_v4hi_di
13208 = build_function_type_list (V4HI_type_node,
13209 V4HI_type_node, long_long_unsigned_type_node,
13211 tree v2si_ftype_v2si_di
13212 = build_function_type_list (V2SI_type_node,
13213 V2SI_type_node, long_long_unsigned_type_node,
13215 tree void_ftype_void
13216 = build_function_type (void_type_node, void_list_node);
13217 tree void_ftype_unsigned
13218 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13219 tree void_ftype_unsigned_unsigned
13220 = build_function_type_list (void_type_node, unsigned_type_node,
13221 unsigned_type_node, NULL_TREE);
13222 tree void_ftype_pcvoid_unsigned_unsigned
13223 = build_function_type_list (void_type_node, const_ptr_type_node,
13224 unsigned_type_node, unsigned_type_node,
13226 tree unsigned_ftype_void
13227 = build_function_type (unsigned_type_node, void_list_node);
13229 = build_function_type (long_long_unsigned_type_node, void_list_node);
13230 tree v4sf_ftype_void
13231 = build_function_type (V4SF_type_node, void_list_node);
13232 tree v2si_ftype_v4sf
13233 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13234 /* Loads/stores. */
13235 tree void_ftype_v8qi_v8qi_pchar
13236 = build_function_type_list (void_type_node,
13237 V8QI_type_node, V8QI_type_node,
13238 pchar_type_node, NULL_TREE);
13239 tree v4sf_ftype_pcfloat
13240 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13241 /* @@@ the type is bogus */
13242 tree v4sf_ftype_v4sf_pv2si
13243 = build_function_type_list (V4SF_type_node,
13244 V4SF_type_node, pv2si_type_node, NULL_TREE);
13245 tree void_ftype_pv2si_v4sf
13246 = build_function_type_list (void_type_node,
13247 pv2si_type_node, V4SF_type_node, NULL_TREE);
13248 tree void_ftype_pfloat_v4sf
13249 = build_function_type_list (void_type_node,
13250 pfloat_type_node, V4SF_type_node, NULL_TREE);
13251 tree void_ftype_pdi_di
13252 = build_function_type_list (void_type_node,
13253 pdi_type_node, long_long_unsigned_type_node,
13255 tree void_ftype_pv2di_v2di
13256 = build_function_type_list (void_type_node,
13257 pv2di_type_node, V2DI_type_node, NULL_TREE);
13258 /* Normal vector unops. */
13259 tree v4sf_ftype_v4sf
13260 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13262 /* Normal vector binops. */
13263 tree v4sf_ftype_v4sf_v4sf
13264 = build_function_type_list (V4SF_type_node,
13265 V4SF_type_node, V4SF_type_node, NULL_TREE);
13266 tree v8qi_ftype_v8qi_v8qi
13267 = build_function_type_list (V8QI_type_node,
13268 V8QI_type_node, V8QI_type_node, NULL_TREE);
13269 tree v4hi_ftype_v4hi_v4hi
13270 = build_function_type_list (V4HI_type_node,
13271 V4HI_type_node, V4HI_type_node, NULL_TREE);
13272 tree v2si_ftype_v2si_v2si
13273 = build_function_type_list (V2SI_type_node,
13274 V2SI_type_node, V2SI_type_node, NULL_TREE);
13275 tree di_ftype_di_di
13276 = build_function_type_list (long_long_unsigned_type_node,
13277 long_long_unsigned_type_node,
13278 long_long_unsigned_type_node, NULL_TREE);
13280 tree v2si_ftype_v2sf
13281 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13282 tree v2sf_ftype_v2si
13283 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13284 tree v2si_ftype_v2si
13285 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13286 tree v2sf_ftype_v2sf
13287 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13288 tree v2sf_ftype_v2sf_v2sf
13289 = build_function_type_list (V2SF_type_node,
13290 V2SF_type_node, V2SF_type_node, NULL_TREE);
13291 tree v2si_ftype_v2sf_v2sf
13292 = build_function_type_list (V2SI_type_node,
13293 V2SF_type_node, V2SF_type_node, NULL_TREE);
13294 tree pint_type_node = build_pointer_type (integer_type_node);
13295 tree pcint_type_node = build_pointer_type (
13296 build_type_variant (integer_type_node, 1, 0));
13297 tree pdouble_type_node = build_pointer_type (double_type_node);
13298 tree pcdouble_type_node = build_pointer_type (
13299 build_type_variant (double_type_node, 1, 0));
13300 tree int_ftype_v2df_v2df
13301 = build_function_type_list (integer_type_node,
13302 V2DF_type_node, V2DF_type_node, NULL_TREE);
13305 = build_function_type (intTI_type_node, void_list_node);
13306 tree v2di_ftype_void
13307 = build_function_type (V2DI_type_node, void_list_node);
13308 tree ti_ftype_ti_ti
13309 = build_function_type_list (intTI_type_node,
13310 intTI_type_node, intTI_type_node, NULL_TREE);
13311 tree void_ftype_pcvoid
13312 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13314 = build_function_type_list (V2DI_type_node,
13315 long_long_unsigned_type_node, NULL_TREE);
13317 = build_function_type_list (long_long_unsigned_type_node,
13318 V2DI_type_node, NULL_TREE);
13319 tree v4sf_ftype_v4si
13320 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13321 tree v4si_ftype_v4sf
13322 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13323 tree v2df_ftype_v4si
13324 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13325 tree v4si_ftype_v2df
13326 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13327 tree v2si_ftype_v2df
13328 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13329 tree v4sf_ftype_v2df
13330 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13331 tree v2df_ftype_v2si
13332 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13333 tree v2df_ftype_v4sf
13334 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13335 tree int_ftype_v2df
13336 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13337 tree int64_ftype_v2df
13338 = build_function_type_list (long_long_integer_type_node,
13339 V2DF_type_node, NULL_TREE);
13340 tree v2df_ftype_v2df_int
13341 = build_function_type_list (V2DF_type_node,
13342 V2DF_type_node, integer_type_node, NULL_TREE);
13343 tree v2df_ftype_v2df_int64
13344 = build_function_type_list (V2DF_type_node,
13345 V2DF_type_node, long_long_integer_type_node,
13347 tree v4sf_ftype_v4sf_v2df
13348 = build_function_type_list (V4SF_type_node,
13349 V4SF_type_node, V2DF_type_node, NULL_TREE);
13350 tree v2df_ftype_v2df_v4sf
13351 = build_function_type_list (V2DF_type_node,
13352 V2DF_type_node, V4SF_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_v2df_int
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, V2DF_type_node,
13358 tree v2df_ftype_v2df_pv2si
13359 = build_function_type_list (V2DF_type_node,
13360 V2DF_type_node, pv2si_type_node, NULL_TREE);
13361 tree void_ftype_pv2si_v2df
13362 = build_function_type_list (void_type_node,
13363 pv2si_type_node, V2DF_type_node, NULL_TREE);
13364 tree void_ftype_pdouble_v2df
13365 = build_function_type_list (void_type_node,
13366 pdouble_type_node, V2DF_type_node, NULL_TREE);
13367 tree void_ftype_pint_int
13368 = build_function_type_list (void_type_node,
13369 pint_type_node, integer_type_node, NULL_TREE);
13370 tree void_ftype_v16qi_v16qi_pchar
13371 = build_function_type_list (void_type_node,
13372 V16QI_type_node, V16QI_type_node,
13373 pchar_type_node, NULL_TREE);
13374 tree v2df_ftype_pcdouble
13375 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13376 tree v2df_ftype_v2df_v2df
13377 = build_function_type_list (V2DF_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13379 tree v16qi_ftype_v16qi_v16qi
13380 = build_function_type_list (V16QI_type_node,
13381 V16QI_type_node, V16QI_type_node, NULL_TREE);
13382 tree v8hi_ftype_v8hi_v8hi
13383 = build_function_type_list (V8HI_type_node,
13384 V8HI_type_node, V8HI_type_node, NULL_TREE);
13385 tree v4si_ftype_v4si_v4si
13386 = build_function_type_list (V4SI_type_node,
13387 V4SI_type_node, V4SI_type_node, NULL_TREE);
13388 tree v2di_ftype_v2di_v2di
13389 = build_function_type_list (V2DI_type_node,
13390 V2DI_type_node, V2DI_type_node, NULL_TREE);
13391 tree v2di_ftype_v2df_v2df
13392 = build_function_type_list (V2DI_type_node,
13393 V2DF_type_node, V2DF_type_node, NULL_TREE);
13394 tree v2df_ftype_v2df
13395 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13396 tree v2df_ftype_double
13397 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13398 tree v2df_ftype_double_double
13399 = build_function_type_list (V2DF_type_node,
13400 double_type_node, double_type_node, NULL_TREE);
13401 tree int_ftype_v8hi_int
13402 = build_function_type_list (integer_type_node,
13403 V8HI_type_node, integer_type_node, NULL_TREE);
13404 tree v8hi_ftype_v8hi_int_int
13405 = build_function_type_list (V8HI_type_node,
13406 V8HI_type_node, integer_type_node,
13407 integer_type_node, NULL_TREE);
13408 tree v2di_ftype_v2di_int
13409 = build_function_type_list (V2DI_type_node,
13410 V2DI_type_node, integer_type_node, NULL_TREE);
13411 tree v4si_ftype_v4si_int
13412 = build_function_type_list (V4SI_type_node,
13413 V4SI_type_node, integer_type_node, NULL_TREE);
13414 tree v8hi_ftype_v8hi_int
13415 = build_function_type_list (V8HI_type_node,
13416 V8HI_type_node, integer_type_node, NULL_TREE);
13417 tree v8hi_ftype_v8hi_v2di
13418 = build_function_type_list (V8HI_type_node,
13419 V8HI_type_node, V2DI_type_node, NULL_TREE);
13420 tree v4si_ftype_v4si_v2di
13421 = build_function_type_list (V4SI_type_node,
13422 V4SI_type_node, V2DI_type_node, NULL_TREE);
13423 tree v4si_ftype_v8hi_v8hi
13424 = build_function_type_list (V4SI_type_node,
13425 V8HI_type_node, V8HI_type_node, NULL_TREE);
13426 tree di_ftype_v8qi_v8qi
13427 = build_function_type_list (long_long_unsigned_type_node,
13428 V8QI_type_node, V8QI_type_node, NULL_TREE);
13429 tree v2di_ftype_v16qi_v16qi
13430 = build_function_type_list (V2DI_type_node,
13431 V16QI_type_node, V16QI_type_node, NULL_TREE);
13432 tree int_ftype_v16qi
13433 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13434 tree v16qi_ftype_pcchar
13435 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13436 tree void_ftype_pchar_v16qi
13437 = build_function_type_list (void_type_node,
13438 pchar_type_node, V16QI_type_node, NULL_TREE);
13439 tree v4si_ftype_pcint
13440 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13441 tree void_ftype_pcint_v4si
13442 = build_function_type_list (void_type_node,
13443 pcint_type_node, V4SI_type_node, NULL_TREE);
13444 tree v2di_ftype_v2di
13445 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13448 tree float128_type;
13450 /* The __float80 type. */
13451 if (TYPE_MODE (long_double_type_node) == XFmode)
13452 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13456 /* The __float80 type. */
13457 float80_type = make_node (REAL_TYPE);
13458 TYPE_PRECISION (float80_type) = 96;
13459 layout_type (float80_type);
13460 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13463 float128_type = make_node (REAL_TYPE);
13464 TYPE_PRECISION (float128_type) = 128;
13465 layout_type (float128_type);
13466 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13468 /* Add all builtins that are more or less simple operations on two
13470 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13472 /* Use one of the operands; the target can have a different mode for
13473 mask-generating compares. */
13474 enum machine_mode mode;
13479 mode = insn_data[d->icode].operand[1].mode;
13484 type = v16qi_ftype_v16qi_v16qi;
13487 type = v8hi_ftype_v8hi_v8hi;
13490 type = v4si_ftype_v4si_v4si;
13493 type = v2di_ftype_v2di_v2di;
13496 type = v2df_ftype_v2df_v2df;
13499 type = ti_ftype_ti_ti;
13502 type = v4sf_ftype_v4sf_v4sf;
13505 type = v8qi_ftype_v8qi_v8qi;
13508 type = v4hi_ftype_v4hi_v4hi;
13511 type = v2si_ftype_v2si_v2si;
13514 type = di_ftype_di_di;
13521 /* Override for comparisons. */
13522 if (d->icode == CODE_FOR_maskcmpv4sf3
13523 || d->icode == CODE_FOR_maskncmpv4sf3
13524 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13525 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13526 type = v4si_ftype_v4sf_v4sf;
13528 if (d->icode == CODE_FOR_maskcmpv2df3
13529 || d->icode == CODE_FOR_maskncmpv2df3
13530 || d->icode == CODE_FOR_vmmaskcmpv2df3
13531 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13532 type = v2di_ftype_v2df_v2df;
13534 def_builtin (d->mask, d->name, type, d->code);
13537 /* Add the remaining MMX insns with somewhat more complicated types. */
13538 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13539 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13540 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13541 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13542 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13544 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13545 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13546 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13548 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13549 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13551 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13552 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13554 /* comi/ucomi insns. */
13555 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13556 if (d->mask == MASK_SSE2)
13557 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13559 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13561 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13562 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13563 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13565 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13566 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13567 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13568 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13569 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13570 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13571 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13572 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13573 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13574 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13575 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13577 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13578 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13582 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13583 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13584 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13585 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13590 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13591 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13592 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13595 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13596 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13597 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13599 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13601 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13603 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13604 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13605 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13606 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13607 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13608 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13612 /* Original 3DNow! */
13613 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13614 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13615 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13616 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13617 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13618 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13619 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13634 /* 3DNow! extension as used in the Athlon CPU. */
13635 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13636 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13637 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13638 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13640 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13642 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13693 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13694 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13701 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13725 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13753 /* Prescott New Instructions. */
13754 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13755 void_ftype_pcvoid_unsigned_unsigned,
13756 IX86_BUILTIN_MONITOR);
13757 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13758 void_ftype_unsigned_unsigned,
13759 IX86_BUILTIN_MWAIT);
13760 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13762 IX86_BUILTIN_MOVSHDUP);
13763 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13765 IX86_BUILTIN_MOVSLDUP);
13766 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13767 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13768 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13769 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13770 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13771 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13774 /* Errors in the source file can cause expand_expr to return const0_rtx
13775 where we expect a vector. To avoid crashing, use one of the vector
13776 clear instructions. */
13778 safe_vector_operand (rtx x, enum machine_mode mode)
13780 if (x != const0_rtx)
13782 x = gen_reg_rtx (mode);
13784 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13785 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13786 : gen_rtx_SUBREG (DImode, x, 0)));
13788 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13789 : gen_rtx_SUBREG (V4SFmode, x, 0),
13790 CONST0_RTX (V4SFmode)));
13794 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13797 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13800 tree arg0 = TREE_VALUE (arglist);
13801 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13802 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13803 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13804 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13805 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13806 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13808 if (VECTOR_MODE_P (mode0))
13809 op0 = safe_vector_operand (op0, mode0);
13810 if (VECTOR_MODE_P (mode1))
13811 op1 = safe_vector_operand (op1, mode1);
13814 || GET_MODE (target) != tmode
13815 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13816 target = gen_reg_rtx (tmode);
13818 if (GET_MODE (op1) == SImode && mode1 == TImode)
13820 rtx x = gen_reg_rtx (V4SImode);
13821 emit_insn (gen_sse2_loadd (x, op1));
13822 op1 = gen_lowpart (TImode, x);
13825 /* In case the insn wants input operands in modes different from
13826 the result, abort. */
13827 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13828 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13832 op0 = copy_to_mode_reg (mode0, op0);
13833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13834 op1 = copy_to_mode_reg (mode1, op1);
13836 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13837 yet one of the two must not be a memory. This is normally enforced
13838 by expanders, but we didn't bother to create one here. */
13839 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13840 op0 = copy_to_mode_reg (mode0, op0);
13842 pat = GEN_FCN (icode) (target, op0, op1);
13849 /* Subroutine of ix86_expand_builtin to take care of stores. */
13852 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13855 tree arg0 = TREE_VALUE (arglist);
13856 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13857 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13858 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13859 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13860 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13862 if (VECTOR_MODE_P (mode1))
13863 op1 = safe_vector_operand (op1, mode1);
13865 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13866 op1 = copy_to_mode_reg (mode1, op1);
13868 pat = GEN_FCN (icode) (op0, op1);
13874 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13877 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13878 rtx target, int do_load)
13881 tree arg0 = TREE_VALUE (arglist);
13882 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13884 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13887 || GET_MODE (target) != tmode
13888 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13889 target = gen_reg_rtx (tmode);
13891 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13894 if (VECTOR_MODE_P (mode0))
13895 op0 = safe_vector_operand (op0, mode0);
13897 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13898 op0 = copy_to_mode_reg (mode0, op0);
13901 pat = GEN_FCN (icode) (target, op0);
13908 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13909 sqrtss, rsqrtss, rcpss. */
13912 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13915 tree arg0 = TREE_VALUE (arglist);
13916 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13917 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13918 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13921 || GET_MODE (target) != tmode
13922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13923 target = gen_reg_rtx (tmode);
13925 if (VECTOR_MODE_P (mode0))
13926 op0 = safe_vector_operand (op0, mode0);
13928 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13929 op0 = copy_to_mode_reg (mode0, op0);
13932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13933 op1 = copy_to_mode_reg (mode0, op1);
13935 pat = GEN_FCN (icode) (target, op0, op1);
13942 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13945 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13949 tree arg0 = TREE_VALUE (arglist);
13950 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13951 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13952 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13954 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13955 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13956 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13957 enum rtx_code comparison = d->comparison;
13959 if (VECTOR_MODE_P (mode0))
13960 op0 = safe_vector_operand (op0, mode0);
13961 if (VECTOR_MODE_P (mode1))
13962 op1 = safe_vector_operand (op1, mode1);
13964 /* Swap operands if we have a comparison that isn't available in
13968 rtx tmp = gen_reg_rtx (mode1);
13969 emit_move_insn (tmp, op1);
13975 || GET_MODE (target) != tmode
13976 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13977 target = gen_reg_rtx (tmode);
13979 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13980 op0 = copy_to_mode_reg (mode0, op0);
13981 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13982 op1 = copy_to_mode_reg (mode1, op1);
13984 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13985 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13992 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13995 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13999 tree arg0 = TREE_VALUE (arglist);
14000 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14001 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14002 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14004 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14005 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14006 enum rtx_code comparison = d->comparison;
14008 if (VECTOR_MODE_P (mode0))
14009 op0 = safe_vector_operand (op0, mode0);
14010 if (VECTOR_MODE_P (mode1))
14011 op1 = safe_vector_operand (op1, mode1);
14013 /* Swap operands if we have a comparison that isn't available in
14022 target = gen_reg_rtx (SImode);
14023 emit_move_insn (target, const0_rtx);
14024 target = gen_rtx_SUBREG (QImode, target, 0);
14026 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14027 op0 = copy_to_mode_reg (mode0, op0);
14028 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14029 op1 = copy_to_mode_reg (mode1, op1);
14031 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14032 pat = GEN_FCN (d->icode) (op0, op1);
14036 emit_insn (gen_rtx_SET (VOIDmode,
14037 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14038 gen_rtx_fmt_ee (comparison, QImode,
14042 return SUBREG_REG (target);
14045 /* Expand an expression EXP that calls a built-in function,
14046 with result going to TARGET if that's convenient
14047 (and in mode MODE if that's convenient).
14048 SUBTARGET may be used as the target for computing one of EXP's operands.
14049 IGNORE is nonzero if the value is to be ignored. */
14052 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14053 enum machine_mode mode ATTRIBUTE_UNUSED,
14054 int ignore ATTRIBUTE_UNUSED)
14056 const struct builtin_description *d;
14058 enum insn_code icode;
14059 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14060 tree arglist = TREE_OPERAND (exp, 1);
14061 tree arg0, arg1, arg2;
14062 rtx op0, op1, op2, pat;
14063 enum machine_mode tmode, mode0, mode1, mode2;
14064 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14068 case IX86_BUILTIN_EMMS:
14069 emit_insn (gen_emms ());
14072 case IX86_BUILTIN_SFENCE:
14073 emit_insn (gen_sfence ());
14076 case IX86_BUILTIN_PEXTRW:
14077 case IX86_BUILTIN_PEXTRW128:
14078 icode = (fcode == IX86_BUILTIN_PEXTRW
14079 ? CODE_FOR_mmx_pextrw
14080 : CODE_FOR_sse2_pextrw);
14081 arg0 = TREE_VALUE (arglist);
14082 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14083 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14084 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14085 tmode = insn_data[icode].operand[0].mode;
14086 mode0 = insn_data[icode].operand[1].mode;
14087 mode1 = insn_data[icode].operand[2].mode;
14089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14090 op0 = copy_to_mode_reg (mode0, op0);
14091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14093 error ("selector must be an integer constant in the range 0..%i",
14094 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14095 return gen_reg_rtx (tmode);
14098 || GET_MODE (target) != tmode
14099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14100 target = gen_reg_rtx (tmode);
14101 pat = GEN_FCN (icode) (target, op0, op1);
14107 case IX86_BUILTIN_PINSRW:
14108 case IX86_BUILTIN_PINSRW128:
14109 icode = (fcode == IX86_BUILTIN_PINSRW
14110 ? CODE_FOR_mmx_pinsrw
14111 : CODE_FOR_sse2_pinsrw);
14112 arg0 = TREE_VALUE (arglist);
14113 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14114 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14115 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14116 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14117 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14118 tmode = insn_data[icode].operand[0].mode;
14119 mode0 = insn_data[icode].operand[1].mode;
14120 mode1 = insn_data[icode].operand[2].mode;
14121 mode2 = insn_data[icode].operand[3].mode;
14123 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14124 op0 = copy_to_mode_reg (mode0, op0);
14125 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14126 op1 = copy_to_mode_reg (mode1, op1);
14127 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14129 error ("selector must be an integer constant in the range 0..%i",
14130 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14134 || GET_MODE (target) != tmode
14135 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14136 target = gen_reg_rtx (tmode);
14137 pat = GEN_FCN (icode) (target, op0, op1, op2);
14143 case IX86_BUILTIN_MASKMOVQ:
14144 case IX86_BUILTIN_MASKMOVDQU:
14145 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14146 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14147 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14148 : CODE_FOR_sse2_maskmovdqu));
14149 /* Note the arg order is different from the operand order. */
14150 arg1 = TREE_VALUE (arglist);
14151 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14152 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14153 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14154 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14155 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14156 mode0 = insn_data[icode].operand[0].mode;
14157 mode1 = insn_data[icode].operand[1].mode;
14158 mode2 = insn_data[icode].operand[2].mode;
14160 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14161 op0 = copy_to_mode_reg (mode0, op0);
14162 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14163 op1 = copy_to_mode_reg (mode1, op1);
14164 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14165 op2 = copy_to_mode_reg (mode2, op2);
14166 pat = GEN_FCN (icode) (op0, op1, op2);
14172 case IX86_BUILTIN_SQRTSS:
14173 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14174 case IX86_BUILTIN_RSQRTSS:
14175 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14176 case IX86_BUILTIN_RCPSS:
14177 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14179 case IX86_BUILTIN_LOADAPS:
14180 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14182 case IX86_BUILTIN_LOADUPS:
14183 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14185 case IX86_BUILTIN_STOREAPS:
14186 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14188 case IX86_BUILTIN_STOREUPS:
14189 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14191 case IX86_BUILTIN_LOADSS:
14192 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14194 case IX86_BUILTIN_STORESS:
14195 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14197 case IX86_BUILTIN_LOADHPS:
14198 case IX86_BUILTIN_LOADLPS:
14199 case IX86_BUILTIN_LOADHPD:
14200 case IX86_BUILTIN_LOADLPD:
14201 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14202 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14203 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14204 : CODE_FOR_sse2_movsd);
14205 arg0 = TREE_VALUE (arglist);
14206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14209 tmode = insn_data[icode].operand[0].mode;
14210 mode0 = insn_data[icode].operand[1].mode;
14211 mode1 = insn_data[icode].operand[2].mode;
14213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14214 op0 = copy_to_mode_reg (mode0, op0);
14215 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14220 pat = GEN_FCN (icode) (target, op0, op1);
14226 case IX86_BUILTIN_STOREHPS:
14227 case IX86_BUILTIN_STORELPS:
14228 case IX86_BUILTIN_STOREHPD:
14229 case IX86_BUILTIN_STORELPD:
14230 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14231 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14232 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14233 : CODE_FOR_sse2_movsd);
14234 arg0 = TREE_VALUE (arglist);
14235 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14236 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14237 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14238 mode0 = insn_data[icode].operand[1].mode;
14239 mode1 = insn_data[icode].operand[2].mode;
14241 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14243 op1 = copy_to_mode_reg (mode1, op1);
14245 pat = GEN_FCN (icode) (op0, op0, op1);
14251 case IX86_BUILTIN_MOVNTPS:
14252 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14253 case IX86_BUILTIN_MOVNTQ:
14254 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14256 case IX86_BUILTIN_LDMXCSR:
14257 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14258 target = assign_386_stack_local (SImode, 0);
14259 emit_move_insn (target, op0);
14260 emit_insn (gen_ldmxcsr (target));
14263 case IX86_BUILTIN_STMXCSR:
14264 target = assign_386_stack_local (SImode, 0);
14265 emit_insn (gen_stmxcsr (target));
14266 return copy_to_mode_reg (SImode, target);
14268 case IX86_BUILTIN_SHUFPS:
14269 case IX86_BUILTIN_SHUFPD:
14270 icode = (fcode == IX86_BUILTIN_SHUFPS
14271 ? CODE_FOR_sse_shufps
14272 : CODE_FOR_sse2_shufpd);
14273 arg0 = TREE_VALUE (arglist);
14274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14279 tmode = insn_data[icode].operand[0].mode;
14280 mode0 = insn_data[icode].operand[1].mode;
14281 mode1 = insn_data[icode].operand[2].mode;
14282 mode2 = insn_data[icode].operand[3].mode;
14284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14285 op0 = copy_to_mode_reg (mode0, op0);
14286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14287 op1 = copy_to_mode_reg (mode1, op1);
14288 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14290 /* @@@ better error message */
14291 error ("mask must be an immediate");
14292 return gen_reg_rtx (tmode);
14295 || GET_MODE (target) != tmode
14296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14297 target = gen_reg_rtx (tmode);
14298 pat = GEN_FCN (icode) (target, op0, op1, op2);
14304 case IX86_BUILTIN_PSHUFW:
14305 case IX86_BUILTIN_PSHUFD:
14306 case IX86_BUILTIN_PSHUFHW:
14307 case IX86_BUILTIN_PSHUFLW:
14308 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14309 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14310 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14311 : CODE_FOR_mmx_pshufw);
14312 arg0 = TREE_VALUE (arglist);
14313 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14316 tmode = insn_data[icode].operand[0].mode;
14317 mode1 = insn_data[icode].operand[1].mode;
14318 mode2 = insn_data[icode].operand[2].mode;
14320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14321 op0 = copy_to_mode_reg (mode1, op0);
14322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14324 /* @@@ better error message */
14325 error ("mask must be an immediate");
14329 || GET_MODE (target) != tmode
14330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14331 target = gen_reg_rtx (tmode);
14332 pat = GEN_FCN (icode) (target, op0, op1);
14338 case IX86_BUILTIN_PSLLDQI128:
14339 case IX86_BUILTIN_PSRLDQI128:
14340 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14341 : CODE_FOR_sse2_lshrti3);
14342 arg0 = TREE_VALUE (arglist);
14343 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14344 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14345 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14346 tmode = insn_data[icode].operand[0].mode;
14347 mode1 = insn_data[icode].operand[1].mode;
14348 mode2 = insn_data[icode].operand[2].mode;
14350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14352 op0 = copy_to_reg (op0);
14353 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14357 error ("shift must be an immediate");
14360 target = gen_reg_rtx (V2DImode);
14361 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14367 case IX86_BUILTIN_FEMMS:
14368 emit_insn (gen_femms ());
14371 case IX86_BUILTIN_PAVGUSB:
14372 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14374 case IX86_BUILTIN_PF2ID:
14375 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14377 case IX86_BUILTIN_PFACC:
14378 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14380 case IX86_BUILTIN_PFADD:
14381 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14383 case IX86_BUILTIN_PFCMPEQ:
14384 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14386 case IX86_BUILTIN_PFCMPGE:
14387 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14389 case IX86_BUILTIN_PFCMPGT:
14390 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14392 case IX86_BUILTIN_PFMAX:
14393 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14395 case IX86_BUILTIN_PFMIN:
14396 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14398 case IX86_BUILTIN_PFMUL:
14399 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14401 case IX86_BUILTIN_PFRCP:
14402 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14404 case IX86_BUILTIN_PFRCPIT1:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14407 case IX86_BUILTIN_PFRCPIT2:
14408 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14410 case IX86_BUILTIN_PFRSQIT1:
14411 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14413 case IX86_BUILTIN_PFRSQRT:
14414 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14416 case IX86_BUILTIN_PFSUB:
14417 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14419 case IX86_BUILTIN_PFSUBR:
14420 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14422 case IX86_BUILTIN_PI2FD:
14423 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14425 case IX86_BUILTIN_PMULHRW:
14426 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14428 case IX86_BUILTIN_PF2IW:
14429 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14431 case IX86_BUILTIN_PFNACC:
14432 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14434 case IX86_BUILTIN_PFPNACC:
14435 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14437 case IX86_BUILTIN_PI2FW:
14438 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14440 case IX86_BUILTIN_PSWAPDSI:
14441 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14443 case IX86_BUILTIN_PSWAPDSF:
14444 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14446 case IX86_BUILTIN_SSE_ZERO:
14447 target = gen_reg_rtx (V4SFmode);
14448 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14451 case IX86_BUILTIN_MMX_ZERO:
14452 target = gen_reg_rtx (DImode);
14453 emit_insn (gen_mmx_clrdi (target));
14456 case IX86_BUILTIN_CLRTI:
14457 target = gen_reg_rtx (V2DImode);
14458 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14462 case IX86_BUILTIN_SQRTSD:
14463 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14464 case IX86_BUILTIN_LOADAPD:
14465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14466 case IX86_BUILTIN_LOADUPD:
14467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14469 case IX86_BUILTIN_STOREAPD:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14471 case IX86_BUILTIN_STOREUPD:
14472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14474 case IX86_BUILTIN_LOADSD:
14475 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14477 case IX86_BUILTIN_STORESD:
14478 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14480 case IX86_BUILTIN_SETPD1:
14481 target = assign_386_stack_local (DFmode, 0);
14482 arg0 = TREE_VALUE (arglist);
14483 emit_move_insn (adjust_address (target, DFmode, 0),
14484 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14485 op0 = gen_reg_rtx (V2DFmode);
14486 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14487 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14490 case IX86_BUILTIN_SETPD:
14491 target = assign_386_stack_local (V2DFmode, 0);
14492 arg0 = TREE_VALUE (arglist);
14493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14494 emit_move_insn (adjust_address (target, DFmode, 0),
14495 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14496 emit_move_insn (adjust_address (target, DFmode, 8),
14497 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14498 op0 = gen_reg_rtx (V2DFmode);
14499 emit_insn (gen_sse2_movapd (op0, target));
14502 case IX86_BUILTIN_LOADRPD:
14503 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14504 gen_reg_rtx (V2DFmode), 1);
14505 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14508 case IX86_BUILTIN_LOADPD1:
14509 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14510 gen_reg_rtx (V2DFmode), 1);
14511 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14514 case IX86_BUILTIN_STOREPD1:
14515 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14516 case IX86_BUILTIN_STORERPD:
14517 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14519 case IX86_BUILTIN_CLRPD:
14520 target = gen_reg_rtx (V2DFmode);
14521 emit_insn (gen_sse_clrv2df (target));
14524 case IX86_BUILTIN_MFENCE:
14525 emit_insn (gen_sse2_mfence ());
14527 case IX86_BUILTIN_LFENCE:
14528 emit_insn (gen_sse2_lfence ());
14531 case IX86_BUILTIN_CLFLUSH:
14532 arg0 = TREE_VALUE (arglist);
14533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14534 icode = CODE_FOR_sse2_clflush;
14535 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14536 op0 = copy_to_mode_reg (Pmode, op0);
14538 emit_insn (gen_sse2_clflush (op0));
14541 case IX86_BUILTIN_MOVNTPD:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14543 case IX86_BUILTIN_MOVNTDQ:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14545 case IX86_BUILTIN_MOVNTI:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14548 case IX86_BUILTIN_LOADDQA:
14549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14550 case IX86_BUILTIN_LOADDQU:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14552 case IX86_BUILTIN_LOADD:
14553 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14555 case IX86_BUILTIN_STOREDQA:
14556 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14557 case IX86_BUILTIN_STOREDQU:
14558 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14559 case IX86_BUILTIN_STORED:
14560 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14562 case IX86_BUILTIN_MONITOR:
14563 arg0 = TREE_VALUE (arglist);
14564 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14565 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14566 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14567 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14568 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14570 op0 = copy_to_mode_reg (SImode, op0);
14572 op1 = copy_to_mode_reg (SImode, op1);
14574 op2 = copy_to_mode_reg (SImode, op2);
14575 emit_insn (gen_monitor (op0, op1, op2));
14578 case IX86_BUILTIN_MWAIT:
14579 arg0 = TREE_VALUE (arglist);
14580 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14581 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14582 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14584 op0 = copy_to_mode_reg (SImode, op0);
14586 op1 = copy_to_mode_reg (SImode, op1);
14587 emit_insn (gen_mwait (op0, op1));
14590 case IX86_BUILTIN_LOADDDUP:
14591 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14593 case IX86_BUILTIN_LDDQU:
14594 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14601 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14602 if (d->code == fcode)
14604 /* Compares are treated specially. */
14605 if (d->icode == CODE_FOR_maskcmpv4sf3
14606 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14607 || d->icode == CODE_FOR_maskncmpv4sf3
14608 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14609 || d->icode == CODE_FOR_maskcmpv2df3
14610 || d->icode == CODE_FOR_vmmaskcmpv2df3
14611 || d->icode == CODE_FOR_maskncmpv2df3
14612 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14613 return ix86_expand_sse_compare (d, arglist, target);
14615 return ix86_expand_binop_builtin (d->icode, arglist, target);
14618 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14619 if (d->code == fcode)
14620 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14622 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14623 if (d->code == fcode)
14624 return ix86_expand_sse_comi (d, arglist, target);
14626 /* @@@ Should really do something sensible here. */
14630 /* Store OPERAND to the memory after reload is completed. This means
14631 that we can't easily use assign_stack_local. */
14633 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14636 if (!reload_completed)
14638 if (TARGET_RED_ZONE)
14640 result = gen_rtx_MEM (mode,
14641 gen_rtx_PLUS (Pmode,
14643 GEN_INT (-RED_ZONE_SIZE)));
14644 emit_move_insn (result, operand);
14646 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14652 operand = gen_lowpart (DImode, operand);
14656 gen_rtx_SET (VOIDmode,
14657 gen_rtx_MEM (DImode,
14658 gen_rtx_PRE_DEC (DImode,
14659 stack_pointer_rtx)),
14665 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14674 split_di (&operand, 1, operands, operands + 1);
14676 gen_rtx_SET (VOIDmode,
14677 gen_rtx_MEM (SImode,
14678 gen_rtx_PRE_DEC (Pmode,
14679 stack_pointer_rtx)),
14682 gen_rtx_SET (VOIDmode,
14683 gen_rtx_MEM (SImode,
14684 gen_rtx_PRE_DEC (Pmode,
14685 stack_pointer_rtx)),
14690 /* It is better to store HImodes as SImodes. */
14691 if (!TARGET_PARTIAL_REG_STALL)
14692 operand = gen_lowpart (SImode, operand);
14696 gen_rtx_SET (VOIDmode,
14697 gen_rtx_MEM (GET_MODE (operand),
14698 gen_rtx_PRE_DEC (SImode,
14699 stack_pointer_rtx)),
14705 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14710 /* Free operand from the memory. */
14712 ix86_free_from_memory (enum machine_mode mode)
14714 if (!TARGET_RED_ZONE)
14718 if (mode == DImode || TARGET_64BIT)
14720 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14724 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14725 to pop or add instruction if registers are available. */
14726 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14727 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14732 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14733 QImode must go into class Q_REGS.
14734 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14735 movdf to do mem-to-mem moves through integer regs. */
14737 ix86_preferred_reload_class (rtx x, enum reg_class class)
14739 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14741 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14743 /* SSE can't load any constant directly yet. */
14744 if (SSE_CLASS_P (class))
14746 /* Floats can load 0 and 1. */
14747 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14749 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14750 if (MAYBE_SSE_CLASS_P (class))
14751 return (reg_class_subset_p (class, GENERAL_REGS)
14752 ? GENERAL_REGS : FLOAT_REGS);
14756 /* General regs can load everything. */
14757 if (reg_class_subset_p (class, GENERAL_REGS))
14758 return GENERAL_REGS;
14759 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14760 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14763 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14765 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14770 /* If we are copying between general and FP registers, we need a memory
14771 location. The same is true for SSE and MMX registers.
14773 The macro can't work reliably when one of the CLASSES is class containing
14774 registers from multiple units (SSE, MMX, integer). We avoid this by never
14775 combining those units in single alternative in the machine description.
14776 Ensure that this constraint holds to avoid unexpected surprises.
14778 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14779 enforce these sanity checks. */
14781 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14782 enum machine_mode mode, int strict)
14784 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14785 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14786 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14787 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14788 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14789 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14796 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14797 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14798 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14799 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14800 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14802 /* Return the cost of moving data from a register in class CLASS1 to
14803 one in class CLASS2.
14805 It is not required that the cost always equal 2 when FROM is the same as TO;
14806 on some machines it is expensive to move between registers if they are not
14807 general registers. */
14809 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14810 enum reg_class class2)
14812 /* In case we require secondary memory, compute cost of the store followed
14813 by load. In order to avoid bad register allocation choices, we need
14814 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14816 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14820 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14821 MEMORY_MOVE_COST (mode, class1, 1));
14822 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14823 MEMORY_MOVE_COST (mode, class2, 1));
14825 /* In case of copying from general_purpose_register we may emit multiple
14826 stores followed by single load causing memory size mismatch stall.
14827 Count this as arbitrarily high cost of 20. */
14828 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14831 /* In the case of FP/MMX moves, the registers actually overlap, and we
14832 have to switch modes in order to treat them differently. */
14833 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14834 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14840 /* Moves between SSE/MMX and integer unit are expensive. */
14841 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14842 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14843 return ix86_cost->mmxsse_to_integer;
14844 if (MAYBE_FLOAT_CLASS_P (class1))
14845 return ix86_cost->fp_move;
14846 if (MAYBE_SSE_CLASS_P (class1))
14847 return ix86_cost->sse_move;
14848 if (MAYBE_MMX_CLASS_P (class1))
14849 return ix86_cost->mmx_move;
14853 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14855 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14857 /* Flags and only flags can only hold CCmode values. */
14858 if (CC_REGNO_P (regno))
14859 return GET_MODE_CLASS (mode) == MODE_CC;
14860 if (GET_MODE_CLASS (mode) == MODE_CC
14861 || GET_MODE_CLASS (mode) == MODE_RANDOM
14862 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14864 if (FP_REGNO_P (regno))
14865 return VALID_FP_MODE_P (mode);
14866 if (SSE_REGNO_P (regno))
14867 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14868 if (MMX_REGNO_P (regno))
14870 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14871 /* We handle both integer and floats in the general purpose registers.
14872 In future we should be able to handle vector modes as well. */
14873 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14875 /* Take care for QImode values - they can be in non-QI regs, but then
14876 they do cause partial register stalls. */
14877 if (regno < 4 || mode != QImode || TARGET_64BIT)
14879 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14882 /* Return the cost of moving data of mode M between a
14883 register and memory. A value of 2 is the default; this cost is
14884 relative to those in `REGISTER_MOVE_COST'.
14886 If moving between registers and memory is more expensive than
14887 between two registers, you should define this macro to express the
14890 Model also increased moving costs of QImode registers in non
14894 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14896 if (FLOAT_CLASS_P (class))
14913 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14915 if (SSE_CLASS_P (class))
14918 switch (GET_MODE_SIZE (mode))
14932 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14934 if (MMX_CLASS_P (class))
14937 switch (GET_MODE_SIZE (mode))
14948 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14950 switch (GET_MODE_SIZE (mode))
14954 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14955 : ix86_cost->movzbl_load);
14957 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14958 : ix86_cost->int_store[0] + 4);
14961 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14963 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14964 if (mode == TFmode)
14966 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14967 * (((int) GET_MODE_SIZE (mode)
14968 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14972 /* Compute a (partial) cost for rtx X. Return true if the complete
14973 cost has been computed, and false if subexpressions should be
14974 scanned. In either case, *TOTAL contains the cost result. */
14977 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14979 enum machine_mode mode = GET_MODE (x);
14987 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14989 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14991 else if (flag_pic && SYMBOLIC_CONST (x)
14993 || (!GET_CODE (x) != LABEL_REF
14994 && (GET_CODE (x) != SYMBOL_REF
14995 || !SYMBOL_REF_LOCAL_P (x)))))
15002 if (mode == VOIDmode)
15005 switch (standard_80387_constant_p (x))
15010 default: /* Other constants */
15015 /* Start with (MEM (SYMBOL_REF)), since that's where
15016 it'll probably end up. Add a penalty for size. */
15017 *total = (COSTS_N_INSNS (1)
15018 + (flag_pic != 0 && !TARGET_64BIT)
15019 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15025 /* The zero extensions is often completely free on x86_64, so make
15026 it as cheap as possible. */
15027 if (TARGET_64BIT && mode == DImode
15028 && GET_MODE (XEXP (x, 0)) == SImode)
15030 else if (TARGET_ZERO_EXTEND_WITH_AND)
15031 *total = COSTS_N_INSNS (ix86_cost->add);
15033 *total = COSTS_N_INSNS (ix86_cost->movzx);
15037 *total = COSTS_N_INSNS (ix86_cost->movsx);
15041 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15042 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15044 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15047 *total = COSTS_N_INSNS (ix86_cost->add);
15050 if ((value == 2 || value == 3)
15051 && !TARGET_DECOMPOSE_LEA
15052 && ix86_cost->lea <= ix86_cost->shift_const)
15054 *total = COSTS_N_INSNS (ix86_cost->lea);
15064 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15066 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15068 if (INTVAL (XEXP (x, 1)) > 32)
15069 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15071 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15075 if (GET_CODE (XEXP (x, 1)) == AND)
15076 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15078 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15083 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15084 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15086 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15091 if (FLOAT_MODE_P (mode))
15092 *total = COSTS_N_INSNS (ix86_cost->fmul);
15093 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15095 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15098 for (nbits = 0; value != 0; value >>= 1)
15101 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15102 + nbits * ix86_cost->mult_bit);
15106 /* This is arbitrary */
15107 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15108 + 7 * ix86_cost->mult_bit);
15116 if (FLOAT_MODE_P (mode))
15117 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15119 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15123 if (FLOAT_MODE_P (mode))
15124 *total = COSTS_N_INSNS (ix86_cost->fadd);
15125 else if (!TARGET_DECOMPOSE_LEA
15126 && GET_MODE_CLASS (mode) == MODE_INT
15127 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15129 if (GET_CODE (XEXP (x, 0)) == PLUS
15130 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15131 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15132 && CONSTANT_P (XEXP (x, 1)))
15134 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15135 if (val == 2 || val == 4 || val == 8)
15137 *total = COSTS_N_INSNS (ix86_cost->lea);
15138 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15139 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15141 *total += rtx_cost (XEXP (x, 1), outer_code);
15145 else if (GET_CODE (XEXP (x, 0)) == MULT
15146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15148 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15149 if (val == 2 || val == 4 || val == 8)
15151 *total = COSTS_N_INSNS (ix86_cost->lea);
15152 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15153 *total += rtx_cost (XEXP (x, 1), outer_code);
15157 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15159 *total = COSTS_N_INSNS (ix86_cost->lea);
15160 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15161 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15162 *total += rtx_cost (XEXP (x, 1), outer_code);
15169 if (FLOAT_MODE_P (mode))
15171 *total = COSTS_N_INSNS (ix86_cost->fadd);
15179 if (!TARGET_64BIT && mode == DImode)
15181 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15182 + (rtx_cost (XEXP (x, 0), outer_code)
15183 << (GET_MODE (XEXP (x, 0)) != DImode))
15184 + (rtx_cost (XEXP (x, 1), outer_code)
15185 << (GET_MODE (XEXP (x, 1)) != DImode)));
15191 if (FLOAT_MODE_P (mode))
15193 *total = COSTS_N_INSNS (ix86_cost->fchs);
15199 if (!TARGET_64BIT && mode == DImode)
15200 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15202 *total = COSTS_N_INSNS (ix86_cost->add);
15206 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15211 if (FLOAT_MODE_P (mode))
15212 *total = COSTS_N_INSNS (ix86_cost->fabs);
15216 if (FLOAT_MODE_P (mode))
15217 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15221 if (XINT (x, 1) == UNSPEC_TP)
15230 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15232 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15235 fputs ("\tpushl $", asm_out_file);
15236 assemble_name (asm_out_file, XSTR (symbol, 0));
15237 fputc ('\n', asm_out_file);
15243 static int current_machopic_label_num;
15245 /* Given a symbol name and its associated stub, write out the
15246 definition of the stub. */
15249 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15251 unsigned int length;
15252 char *binder_name, *symbol_name, lazy_ptr_name[32];
15253 int label = ++current_machopic_label_num;
15255 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15256 symb = (*targetm.strip_name_encoding) (symb);
15258 length = strlen (stub);
15259 binder_name = alloca (length + 32);
15260 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15262 length = strlen (symb);
15263 symbol_name = alloca (length + 32);
15264 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15266 sprintf (lazy_ptr_name, "L%d$lz", label);
15269 machopic_picsymbol_stub_section ();
15271 machopic_symbol_stub_section ();
15273 fprintf (file, "%s:\n", stub);
15274 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15278 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15279 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15280 fprintf (file, "\tjmp %%edx\n");
15283 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15285 fprintf (file, "%s:\n", binder_name);
15289 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15290 fprintf (file, "\tpushl %%eax\n");
15293 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15295 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15297 machopic_lazy_symbol_ptr_section ();
15298 fprintf (file, "%s:\n", lazy_ptr_name);
15299 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15300 fprintf (file, "\t.long %s\n", binder_name);
15302 #endif /* TARGET_MACHO */
15304 /* Order the registers for register allocator. */
15307 x86_order_regs_for_local_alloc (void)
15312 /* First allocate the local general purpose registers. */
15313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15314 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15315 reg_alloc_order [pos++] = i;
15317 /* Global general purpose registers. */
15318 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15319 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15320 reg_alloc_order [pos++] = i;
15322 /* x87 registers come first in case we are doing FP math
15324 if (!TARGET_SSE_MATH)
15325 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15326 reg_alloc_order [pos++] = i;
15328 /* SSE registers. */
15329 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15330 reg_alloc_order [pos++] = i;
15331 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15332 reg_alloc_order [pos++] = i;
15334 /* x87 registers. */
15335 if (TARGET_SSE_MATH)
15336 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15337 reg_alloc_order [pos++] = i;
15339 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15340 reg_alloc_order [pos++] = i;
15342 /* Initialize the rest of array as we do not allocate some registers
15344 while (pos < FIRST_PSEUDO_REGISTER)
15345 reg_alloc_order [pos++] = 0;
15348 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15349 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15352 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15353 struct attribute_spec.handler. */
15355 ix86_handle_struct_attribute (tree *node, tree name,
15356 tree args ATTRIBUTE_UNUSED,
15357 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15360 if (DECL_P (*node))
15362 if (TREE_CODE (*node) == TYPE_DECL)
15363 type = &TREE_TYPE (*node);
15368 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15369 || TREE_CODE (*type) == UNION_TYPE)))
15371 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15372 *no_add_attrs = true;
15375 else if ((is_attribute_p ("ms_struct", name)
15376 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15377 || ((is_attribute_p ("gcc_struct", name)
15378 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15380 warning ("`%s' incompatible attribute ignored",
15381 IDENTIFIER_POINTER (name));
15382 *no_add_attrs = true;
15389 ix86_ms_bitfield_layout_p (tree record_type)
15391 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15392 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15393 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15396 /* Returns an expression indicating where the this parameter is
15397 located on entry to the FUNCTION. */
15400 x86_this_parameter (tree function)
15402 tree type = TREE_TYPE (function);
15406 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15407 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15410 if (ix86_function_regparm (type, function) > 0)
15414 parm = TYPE_ARG_TYPES (type);
15415 /* Figure out whether or not the function has a variable number of
15417 for (; parm; parm = TREE_CHAIN (parm))
15418 if (TREE_VALUE (parm) == void_type_node)
15420 /* If not, the this parameter is in the first argument. */
15424 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15426 return gen_rtx_REG (SImode, regno);
15430 if (aggregate_value_p (TREE_TYPE (type), type))
15431 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15433 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15436 /* Determine whether x86_output_mi_thunk can succeed. */
15439 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15440 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15441 HOST_WIDE_INT vcall_offset, tree function)
15443 /* 64-bit can handle anything. */
15447 /* For 32-bit, everything's fine if we have one free register. */
15448 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15451 /* Need a free register for vcall_offset. */
15455 /* Need a free register for GOT references. */
15456 if (flag_pic && !(*targetm.binds_local_p) (function))
15459 /* Otherwise ok. */
15463 /* Output the assembler code for a thunk function. THUNK_DECL is the
15464 declaration for the thunk function itself, FUNCTION is the decl for
15465 the target function. DELTA is an immediate constant offset to be
15466 added to THIS. If VCALL_OFFSET is nonzero, the word at
15467 *(*this + vcall_offset) should be added to THIS. */
15470 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15471 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15472 HOST_WIDE_INT vcall_offset, tree function)
15475 rtx this = x86_this_parameter (function);
15478 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15479 pull it in now and let DELTA benefit. */
15482 else if (vcall_offset)
15484 /* Put the this parameter into %eax. */
15486 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15487 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15490 this_reg = NULL_RTX;
15492 /* Adjust the this parameter by a fixed constant. */
15495 xops[0] = GEN_INT (delta);
15496 xops[1] = this_reg ? this_reg : this;
15499 if (!x86_64_general_operand (xops[0], DImode))
15501 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15503 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15507 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15510 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15513 /* Adjust the this parameter by a value stored in the vtable. */
15517 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15520 int tmp_regno = 2 /* ECX */;
15521 if (lookup_attribute ("fastcall",
15522 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15523 tmp_regno = 0 /* EAX */;
15524 tmp = gen_rtx_REG (SImode, tmp_regno);
15527 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15530 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15532 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15534 /* Adjust the this parameter. */
15535 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15536 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15538 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15539 xops[0] = GEN_INT (vcall_offset);
15541 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15542 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15544 xops[1] = this_reg;
15546 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15548 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15551 /* If necessary, drop THIS back to its stack slot. */
15552 if (this_reg && this_reg != this)
15554 xops[0] = this_reg;
15556 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15559 xops[0] = XEXP (DECL_RTL (function), 0);
15562 if (!flag_pic || (*targetm.binds_local_p) (function))
15563 output_asm_insn ("jmp\t%P0", xops);
15566 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15567 tmp = gen_rtx_CONST (Pmode, tmp);
15568 tmp = gen_rtx_MEM (QImode, tmp);
15570 output_asm_insn ("jmp\t%A0", xops);
15575 if (!flag_pic || (*targetm.binds_local_p) (function))
15576 output_asm_insn ("jmp\t%P0", xops);
15581 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15582 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15583 tmp = gen_rtx_MEM (QImode, tmp);
15585 output_asm_insn ("jmp\t%0", xops);
15588 #endif /* TARGET_MACHO */
15590 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15591 output_set_got (tmp);
15594 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15595 output_asm_insn ("jmp\t{*}%1", xops);
15601 x86_file_start (void)
15603 default_file_start ();
15604 if (X86_FILE_START_VERSION_DIRECTIVE)
15605 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15606 if (X86_FILE_START_FLTUSED)
15607 fputs ("\t.global\t__fltused\n", asm_out_file);
15608 if (ix86_asm_dialect == ASM_INTEL)
15609 fputs ("\t.intel_syntax\n", asm_out_file);
15613 x86_field_alignment (tree field, int computed)
15615 enum machine_mode mode;
15616 tree type = TREE_TYPE (field);
15618 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15620 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15621 ? get_inner_array_type (type) : type);
15622 if (mode == DFmode || mode == DCmode
15623 || GET_MODE_CLASS (mode) == MODE_INT
15624 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15625 return MIN (32, computed);
15629 /* Output assembler code to FILE to increment profiler label # LABELNO
15630 for profiling a function entry. */
15632 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15637 #ifndef NO_PROFILE_COUNTERS
15638 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15640 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15644 #ifndef NO_PROFILE_COUNTERS
15645 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15647 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15651 #ifndef NO_PROFILE_COUNTERS
15652 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15653 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15655 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15659 #ifndef NO_PROFILE_COUNTERS
15660 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15661 PROFILE_COUNT_REGISTER);
15663 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15667 /* We don't have exact information about the insn sizes, but we may assume
15668 quite safely that we are informed about all 1 byte insns and memory
15669 address sizes. This is enough to eliminate unnecessary padding in
15673 min_insn_size (rtx insn)
15677 if (!INSN_P (insn) || !active_insn_p (insn))
15680 /* Discard alignments we've emit and jump instructions. */
15681 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15682 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15684 if (GET_CODE (insn) == JUMP_INSN
15685 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15686 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15689 /* Important case - calls are always 5 bytes.
15690 It is common to have many calls in the row. */
15691 if (GET_CODE (insn) == CALL_INSN
15692 && symbolic_reference_mentioned_p (PATTERN (insn))
15693 && !SIBLING_CALL_P (insn))
15695 if (get_attr_length (insn) <= 1)
15698 /* For normal instructions we may rely on the sizes of addresses
15699 and the presence of symbol to require 4 bytes of encoding.
15700 This is not the case for jumps where references are PC relative. */
15701 if (GET_CODE (insn) != JUMP_INSN)
15703 l = get_attr_length_address (insn);
15704 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15713 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15717 k8_avoid_jump_misspredicts (void)
15719 rtx insn, start = get_insns ();
15720 int nbytes = 0, njumps = 0;
15723 /* Look for all minimal intervals of instructions containing 4 jumps.
15724 The intervals are bounded by START and INSN. NBYTES is the total
15725 size of instructions in the interval including INSN and not including
15726 START. When the NBYTES is smaller than 16 bytes, it is possible
15727 that the end of START and INSN ends up in the same 16byte page.
15729 The smallest offset in the page INSN can start is the case where START
15730 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15731 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15733 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15736 nbytes += min_insn_size (insn);
15738 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15739 INSN_UID (insn), min_insn_size (insn));
15740 if ((GET_CODE (insn) == JUMP_INSN
15741 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15742 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15743 || GET_CODE (insn) == CALL_INSN)
15750 start = NEXT_INSN (start);
15751 if ((GET_CODE (start) == JUMP_INSN
15752 && GET_CODE (PATTERN (start)) != ADDR_VEC
15753 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15754 || GET_CODE (start) == CALL_INSN)
15755 njumps--, isjump = 1;
15758 nbytes -= min_insn_size (start);
15763 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15764 INSN_UID (start), INSN_UID (insn), nbytes);
15766 if (njumps == 3 && isjump && nbytes < 16)
15768 int padsize = 15 - nbytes + min_insn_size (insn);
15771 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15772 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15777 /* Implement machine specific optimizations.
15778 At the moment we implement single transformation: AMD Athlon works faster
15779 when RET is not destination of conditional jump or directly preceded
15780 by other jump instruction. We avoid the penalty by inserting NOP just
15781 before the RET instructions in such cases. */
15787 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15789 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15791 basic_block bb = e->src;
15792 rtx ret = BB_END (bb);
15794 bool replace = false;
15796 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15797 || !maybe_hot_bb_p (bb))
15799 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15800 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15802 if (prev && GET_CODE (prev) == CODE_LABEL)
15805 for (e = bb->pred; e; e = e->pred_next)
15806 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15807 && !(e->flags & EDGE_FALLTHRU))
15812 prev = prev_active_insn (ret);
15814 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15815 || GET_CODE (prev) == CALL_INSN))
15817 /* Empty functions get branch mispredict even when the jump destination
15818 is not visible to us. */
15819 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15824 emit_insn_before (gen_return_internal_long (), ret);
15828 k8_avoid_jump_misspredicts ();
15831 /* Return nonzero when QImode register that must be represented via REX prefix
15834 x86_extended_QIreg_mentioned_p (rtx insn)
15837 extract_insn_cached (insn);
15838 for (i = 0; i < recog_data.n_operands; i++)
15839 if (REG_P (recog_data.operand[i])
15840 && REGNO (recog_data.operand[i]) >= 4)
15845 /* Return nonzero when P points to register encoded via REX prefix.
15846 Called via for_each_rtx. */
15848 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15850 unsigned int regno;
15853 regno = REGNO (*p);
15854 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15857 /* Return true when INSN mentions register that must be encoded using REX
15860 x86_extended_reg_mentioned_p (rtx insn)
15862 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15865 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15866 optabs would emit if we didn't have TFmode patterns. */
15869 x86_emit_floatuns (rtx operands[2])
15871 rtx neglab, donelab, i0, i1, f0, in, out;
15872 enum machine_mode mode, inmode;
15874 inmode = GET_MODE (operands[1]);
15875 if (inmode != SImode
15876 && inmode != DImode)
15880 in = force_reg (inmode, operands[1]);
15881 mode = GET_MODE (out);
15882 neglab = gen_label_rtx ();
15883 donelab = gen_label_rtx ();
15884 i1 = gen_reg_rtx (Pmode);
15885 f0 = gen_reg_rtx (mode);
15887 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15889 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15890 emit_jump_insn (gen_jump (donelab));
15893 emit_label (neglab);
15895 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15896 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15897 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15898 expand_float (f0, i0, 0);
15899 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15901 emit_label (donelab);
15904 /* Return if we do not know how to pass TYPE solely in registers. */
15906 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15908 if (default_must_pass_in_stack (mode, type))
15910 return (!TARGET_64BIT && type && mode == TImode);
15913 /* Initialize vector TARGET via VALS. */
15915 ix86_expand_vector_init (rtx target, rtx vals)
15917 enum machine_mode mode = GET_MODE (target);
15918 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15919 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15922 for (i = n_elts - 1; i >= 0; i--)
15923 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15924 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15927 /* Few special cases first...
15928 ... constants are best loaded from constant pool. */
15931 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15935 /* ... values where only first field is non-constant are best loaded
15936 from the pool and overwriten via move later. */
15939 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15940 GET_MODE_INNER (mode), 0);
15942 op = force_reg (mode, op);
15943 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15944 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15945 switch (GET_MODE (target))
15948 emit_insn (gen_sse2_movsd (target, target, op));
15951 emit_insn (gen_sse_movss (target, target, op));
15959 /* And the busy sequence doing rotations. */
15960 switch (GET_MODE (target))
15965 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15967 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15969 vecop0 = force_reg (V2DFmode, vecop0);
15970 vecop1 = force_reg (V2DFmode, vecop1);
15971 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15977 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15979 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15981 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15983 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15984 rtx tmp1 = gen_reg_rtx (V4SFmode);
15985 rtx tmp2 = gen_reg_rtx (V4SFmode);
15987 vecop0 = force_reg (V4SFmode, vecop0);
15988 vecop1 = force_reg (V4SFmode, vecop1);
15989 vecop2 = force_reg (V4SFmode, vecop2);
15990 vecop3 = force_reg (V4SFmode, vecop3);
15991 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15992 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15993 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16001 #include "gt-i386.h"