1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
786 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
787 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
788 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name PARAMS ((void));
791 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
792 static rtx maybe_get_pool_constant PARAMS ((rtx));
793 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
794 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
796 static rtx get_thread_pointer PARAMS ((int));
797 static rtx legitimize_tls_address PARAMS ((rtx, enum tls_model, int));
798 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
799 static rtx gen_push PARAMS ((rtx));
800 static int memory_address_length PARAMS ((rtx addr));
801 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
802 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
803 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
804 static void ix86_dump_ppro_packet PARAMS ((FILE *));
805 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
806 static struct machine_function * ix86_init_machine_status PARAMS ((void));
807 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
808 static int ix86_nsaved_regs PARAMS ((void));
809 static void ix86_emit_save_regs PARAMS ((void));
810 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
811 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
812 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
813 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
814 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
815 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
816 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
817 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
818 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
819 static int ix86_issue_rate PARAMS ((void));
820 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
821 static void ix86_sched_init PARAMS ((FILE *, int, int));
822 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
823 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
824 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
825 static int ia32_multipass_dfa_lookahead PARAMS ((void));
826 static void ix86_init_mmx_sse_builtins PARAMS ((void));
827 static rtx x86_this_parameter PARAMS ((tree));
828 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree));
830 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static void x86_file_start PARAMS ((void));
833 static void ix86_reorg PARAMS ((void));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
851 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
854 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
855 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
857 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
858 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
859 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
863 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
865 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
866 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
869 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
870 static int ix86_save_reg PARAMS ((unsigned int, int));
871 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
872 static int ix86_comp_type_attributes PARAMS ((tree, tree));
873 static int ix86_fntype_regparm PARAMS ((tree));
874 const struct attribute_spec ix86_attribute_table[];
875 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
876 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
877 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static int ix86_value_regno PARAMS ((enum machine_mode));
879 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
884 static int min_insn_size PARAMS ((rtx));
885 static void k8_avoid_jump_misspredicts PARAMS ((void));
887 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
888 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
891 /* Register class used for passing given 64bit part of the argument.
892 These represent classes as documented by the PS ABI, with the exception
893 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
894 use SF or DFmode move instead of DImode to avoid reformatting penalties.
896 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
897 whenever possible (upper half does contain padding).
899 enum x86_64_reg_class
902 X86_64_INTEGER_CLASS,
903 X86_64_INTEGERSI_CLASS,
912 static const char * const x86_64_reg_class_name[] =
913 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
915 #define MAX_CLASSES 4
916 static int classify_argument PARAMS ((enum machine_mode, tree,
917 enum x86_64_reg_class [MAX_CLASSES],
919 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
921 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
923 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
924 enum x86_64_reg_class));
926 /* Table of constants used by fldpi, fldln2, etc... */
927 static REAL_VALUE_TYPE ext_80387_constants_table [5];
928 static bool ext_80387_constants_init = 0;
929 static void init_ext_80387_constants PARAMS ((void));
931 /* Initialize the GCC target structure. */
932 #undef TARGET_ATTRIBUTE_TABLE
933 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
934 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
935 # undef TARGET_MERGE_DECL_ATTRIBUTES
936 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
939 #undef TARGET_COMP_TYPE_ATTRIBUTES
940 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942 #undef TARGET_INIT_BUILTINS
943 #define TARGET_INIT_BUILTINS ix86_init_builtins
945 #undef TARGET_EXPAND_BUILTIN
946 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948 #undef TARGET_ASM_FUNCTION_EPILOGUE
949 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
951 #undef TARGET_ASM_OPEN_PAREN
952 #define TARGET_ASM_OPEN_PAREN ""
953 #undef TARGET_ASM_CLOSE_PAREN
954 #define TARGET_ASM_CLOSE_PAREN ""
956 #undef TARGET_ASM_ALIGNED_HI_OP
957 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
958 #undef TARGET_ASM_ALIGNED_SI_OP
959 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
961 #undef TARGET_ASM_ALIGNED_DI_OP
962 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
965 #undef TARGET_ASM_UNALIGNED_HI_OP
966 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
967 #undef TARGET_ASM_UNALIGNED_SI_OP
968 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
969 #undef TARGET_ASM_UNALIGNED_DI_OP
970 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972 #undef TARGET_SCHED_ADJUST_COST
973 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
974 #undef TARGET_SCHED_ISSUE_RATE
975 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
976 #undef TARGET_SCHED_VARIABLE_ISSUE
977 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
978 #undef TARGET_SCHED_INIT
979 #define TARGET_SCHED_INIT ix86_sched_init
980 #undef TARGET_SCHED_REORDER
981 #define TARGET_SCHED_REORDER ix86_sched_reorder
982 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
983 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
984 ia32_use_dfa_pipeline_interface
985 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
986 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
987 ia32_multipass_dfa_lookahead
989 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
990 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
993 #undef TARGET_HAVE_TLS
994 #define TARGET_HAVE_TLS true
996 #undef TARGET_CANNOT_FORCE_CONST_MEM
997 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
999 #undef TARGET_DELEGITIMIZE_ADDRESS
1000 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1002 #undef TARGET_MS_BITFIELD_LAYOUT_P
1003 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005 #undef TARGET_ASM_OUTPUT_MI_THUNK
1006 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1007 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1010 #undef TARGET_ASM_FILE_START
1011 #define TARGET_ASM_FILE_START x86_file_start
1013 #undef TARGET_RTX_COSTS
1014 #define TARGET_RTX_COSTS ix86_rtx_costs
1015 #undef TARGET_ADDRESS_COST
1016 #define TARGET_ADDRESS_COST ix86_address_cost
1018 #undef TARGET_MACHINE_DEPENDENT_REORG
1019 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1021 struct gcc_target targetm = TARGET_INITIALIZER;
1023 /* The svr4 ABI for the i386 says that records and unions are returned
1025 #ifndef DEFAULT_PCC_STRUCT_RETURN
1026 #define DEFAULT_PCC_STRUCT_RETURN 1
1029 /* Sometimes certain combinations of command options do not make
1030 sense on a particular target machine. You can define a macro
1031 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1032 defined, is executed once just after all the command options have
1035 Don't use this macro to turn on various extra optimizations for
1036 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1042 /* Comes from final.c -- no real reason to change it. */
1043 #define MAX_CODE_ALIGN 16
1047 const struct processor_costs *cost; /* Processor costs */
1048 const int target_enable; /* Target flags to enable. */
1049 const int target_disable; /* Target flags to disable. */
1050 const int align_loop; /* Default alignments. */
1051 const int align_loop_max_skip;
1052 const int align_jump;
1053 const int align_jump_max_skip;
1054 const int align_func;
1056 const processor_target_table[PROCESSOR_max] =
1058 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1059 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1060 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1061 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1062 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1063 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1064 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1065 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1068 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1071 const char *const name; /* processor name or nickname. */
1072 const enum processor_type processor;
1073 const enum pta_flags
1078 PTA_PREFETCH_SSE = 8,
1084 const processor_alias_table[] =
1086 {"i386", PROCESSOR_I386, 0},
1087 {"i486", PROCESSOR_I486, 0},
1088 {"i586", PROCESSOR_PENTIUM, 0},
1089 {"pentium", PROCESSOR_PENTIUM, 0},
1090 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1091 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1092 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1093 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1095 {"i686", PROCESSOR_PENTIUMPRO, 0},
1096 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1098 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1099 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1100 PTA_MMX | PTA_PREFETCH_SSE},
1101 {"k6", PROCESSOR_K6, PTA_MMX},
1102 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1103 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1106 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1107 | PTA_3DNOW | PTA_3DNOW_A},
1108 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1113 | PTA_3DNOW_A | PTA_SSE},
1114 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1115 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1118 int const pta_size = ARRAY_SIZE (processor_alias_table);
1120 /* By default our XFmode is the 80-bit extended format. If we have
1121 use TFmode instead, it's also the 80-bit format, but with padding. */
1122 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1123 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1125 /* Set the default values for switches whose default depends on TARGET_64BIT
1126 in case they weren't overwritten by command line options. */
1129 if (flag_omit_frame_pointer == 2)
1130 flag_omit_frame_pointer = 1;
1131 if (flag_asynchronous_unwind_tables == 2)
1132 flag_asynchronous_unwind_tables = 1;
1133 if (flag_pcc_struct_return == 2)
1134 flag_pcc_struct_return = 0;
1138 if (flag_omit_frame_pointer == 2)
1139 flag_omit_frame_pointer = 0;
1140 if (flag_asynchronous_unwind_tables == 2)
1141 flag_asynchronous_unwind_tables = 0;
1142 if (flag_pcc_struct_return == 2)
1143 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1146 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1147 SUBTARGET_OVERRIDE_OPTIONS;
1150 if (!ix86_tune_string && ix86_arch_string)
1151 ix86_tune_string = ix86_arch_string;
1152 if (!ix86_tune_string)
1153 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1154 if (!ix86_arch_string)
1155 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1157 if (ix86_cmodel_string != 0)
1159 if (!strcmp (ix86_cmodel_string, "small"))
1160 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1162 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1163 else if (!strcmp (ix86_cmodel_string, "32"))
1164 ix86_cmodel = CM_32;
1165 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1166 ix86_cmodel = CM_KERNEL;
1167 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1168 ix86_cmodel = CM_MEDIUM;
1169 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1170 ix86_cmodel = CM_LARGE;
1172 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1176 ix86_cmodel = CM_32;
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1180 if (ix86_asm_string != 0)
1182 if (!strcmp (ix86_asm_string, "intel"))
1183 ix86_asm_dialect = ASM_INTEL;
1184 else if (!strcmp (ix86_asm_string, "att"))
1185 ix86_asm_dialect = ASM_ATT;
1187 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1189 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1190 error ("code model `%s' not supported in the %s bit mode",
1191 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1192 if (ix86_cmodel == CM_LARGE)
1193 sorry ("code model `large' not supported yet");
1194 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1195 sorry ("%i-bit mode not compiled in",
1196 (target_flags & MASK_64BIT) ? 64 : 32);
1198 for (i = 0; i < pta_size; i++)
1199 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1201 ix86_arch = processor_alias_table[i].processor;
1202 /* Default cpu tuning to the architecture. */
1203 ix86_tune = ix86_arch;
1204 if (processor_alias_table[i].flags & PTA_MMX
1205 && !(target_flags_explicit & MASK_MMX))
1206 target_flags |= MASK_MMX;
1207 if (processor_alias_table[i].flags & PTA_3DNOW
1208 && !(target_flags_explicit & MASK_3DNOW))
1209 target_flags |= MASK_3DNOW;
1210 if (processor_alias_table[i].flags & PTA_3DNOW_A
1211 && !(target_flags_explicit & MASK_3DNOW_A))
1212 target_flags |= MASK_3DNOW_A;
1213 if (processor_alias_table[i].flags & PTA_SSE
1214 && !(target_flags_explicit & MASK_SSE))
1215 target_flags |= MASK_SSE;
1216 if (processor_alias_table[i].flags & PTA_SSE2
1217 && !(target_flags_explicit & MASK_SSE2))
1218 target_flags |= MASK_SSE2;
1219 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1220 x86_prefetch_sse = true;
1221 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1222 error ("CPU you selected does not support x86-64 instruction set");
1227 error ("bad value (%s) for -march= switch", ix86_arch_string);
1229 for (i = 0; i < pta_size; i++)
1230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1232 ix86_tune = processor_alias_table[i].processor;
1233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1234 error ("CPU you selected does not support x86-64 instruction set");
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
1240 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1243 ix86_cost = &size_cost;
1245 ix86_cost = processor_target_table[ix86_tune].cost;
1246 target_flags |= processor_target_table[ix86_tune].target_enable;
1247 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1249 /* Arrange to set up i386_stack_locals for all functions. */
1250 init_machine_status = ix86_init_machine_status;
1252 /* Validate -mregparm= value. */
1253 if (ix86_regparm_string)
1255 i = atoi (ix86_regparm_string);
1256 if (i < 0 || i > REGPARM_MAX)
1257 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1263 ix86_regparm = REGPARM_MAX;
1265 /* If the user has provided any of the -malign-* options,
1266 warn and use that value only if -falign-* is not set.
1267 Remove this code in GCC 3.2 or later. */
1268 if (ix86_align_loops_string)
1270 warning ("-malign-loops is obsolete, use -falign-loops");
1271 if (align_loops == 0)
1273 i = atoi (ix86_align_loops_string);
1274 if (i < 0 || i > MAX_CODE_ALIGN)
1275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1277 align_loops = 1 << i;
1281 if (ix86_align_jumps_string)
1283 warning ("-malign-jumps is obsolete, use -falign-jumps");
1284 if (align_jumps == 0)
1286 i = atoi (ix86_align_jumps_string);
1287 if (i < 0 || i > MAX_CODE_ALIGN)
1288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1290 align_jumps = 1 << i;
1294 if (ix86_align_funcs_string)
1296 warning ("-malign-functions is obsolete, use -falign-functions");
1297 if (align_functions == 0)
1299 i = atoi (ix86_align_funcs_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 align_functions = 1 << i;
1307 /* Default align_* from the processor table. */
1308 if (align_loops == 0)
1310 align_loops = processor_target_table[ix86_tune].align_loop;
1311 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1313 if (align_jumps == 0)
1315 align_jumps = processor_target_table[ix86_tune].align_jump;
1316 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1318 if (align_functions == 0)
1320 align_functions = processor_target_table[ix86_tune].align_func;
1323 /* Validate -mpreferred-stack-boundary= value, or provide default.
1324 The default of 128 bits is for Pentium III's SSE __m128, but we
1325 don't want additional code to keep the stack aligned when
1326 optimizing for code size. */
1327 ix86_preferred_stack_boundary = (optimize_size
1328 ? TARGET_64BIT ? 128 : 32
1330 if (ix86_preferred_stack_boundary_string)
1332 i = atoi (ix86_preferred_stack_boundary_string);
1333 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1334 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1335 TARGET_64BIT ? 4 : 2);
1337 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1340 /* Validate -mbranch-cost= value, or provide default. */
1341 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1342 if (ix86_branch_cost_string)
1344 i = atoi (ix86_branch_cost_string);
1346 error ("-mbranch-cost=%d is not between 0 and 5", i);
1348 ix86_branch_cost = i;
1351 if (ix86_tls_dialect_string)
1353 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1354 ix86_tls_dialect = TLS_DIALECT_GNU;
1355 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1356 ix86_tls_dialect = TLS_DIALECT_SUN;
1358 error ("bad value (%s) for -mtls-dialect= switch",
1359 ix86_tls_dialect_string);
1362 /* Keep nonleaf frame pointers. */
1363 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1364 flag_omit_frame_pointer = 1;
1366 /* If we're doing fast math, we don't care about comparison order
1367 wrt NaNs. This lets us use a shorter comparison sequence. */
1368 if (flag_unsafe_math_optimizations)
1369 target_flags &= ~MASK_IEEE_FP;
1371 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1372 since the insns won't need emulation. */
1373 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1374 target_flags &= ~MASK_NO_FANCY_MATH_387;
1378 if (TARGET_ALIGN_DOUBLE)
1379 error ("-malign-double makes no sense in the 64bit mode");
1381 error ("-mrtd calling convention not supported in the 64bit mode");
1382 /* Enable by default the SSE and MMX builtins. */
1383 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1384 ix86_fpmath = FPMATH_SSE;
1388 ix86_fpmath = FPMATH_387;
1389 /* i386 ABI does not specify red zone. It still makes sense to use it
1390 when programmer takes care to stack from being destroyed. */
1391 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1392 target_flags |= MASK_NO_RED_ZONE;
1395 if (ix86_fpmath_string != 0)
1397 if (! strcmp (ix86_fpmath_string, "387"))
1398 ix86_fpmath = FPMATH_387;
1399 else if (! strcmp (ix86_fpmath_string, "sse"))
1403 warning ("SSE instruction set disabled, using 387 arithmetics");
1404 ix86_fpmath = FPMATH_387;
1407 ix86_fpmath = FPMATH_SSE;
1409 else if (! strcmp (ix86_fpmath_string, "387,sse")
1410 || ! strcmp (ix86_fpmath_string, "sse,387"))
1414 warning ("SSE instruction set disabled, using 387 arithmetics");
1415 ix86_fpmath = FPMATH_387;
1417 else if (!TARGET_80387)
1419 warning ("387 instruction set disabled, using SSE arithmetics");
1420 ix86_fpmath = FPMATH_SSE;
1423 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1426 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1429 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1433 target_flags |= MASK_MMX;
1434 x86_prefetch_sse = true;
1437 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1440 target_flags |= MASK_MMX;
1441 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1442 extensions it adds. */
1443 if (x86_3dnow_a & (1 << ix86_arch))
1444 target_flags |= MASK_3DNOW_A;
1446 if ((x86_accumulate_outgoing_args & TUNEMASK)
1447 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1449 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1451 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1454 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1455 p = strchr (internal_label_prefix, 'X');
1456 internal_label_prefix_len = p - internal_label_prefix;
1462 optimization_options (level, size)
1464 int size ATTRIBUTE_UNUSED;
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (decl, exp)
1517 /* If we are generating position-independent code, we cannot sibcall
1518 optimize any indirect call, or a direct call to a global function,
1519 as the PLT requires %ebx be live. */
1520 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1523 /* If we are returning floats on the 80387 register stack, we cannot
1524 make a sibcall from a function that doesn't return a float to a
1525 function that does or, conversely, from a function that does return
1526 a float to a function that doesn't; the necessary stack adjustment
1527 would not be executed. */
1528 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1529 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1532 /* If this call is indirect, we'll need to be able to use a call-clobbered
1533 register for the address of the target function. Make sure that all
1534 such registers are not used for passing parameters. */
1535 if (!decl && !TARGET_64BIT)
1537 int regparm = ix86_regparm;
1540 /* We're looking at the CALL_EXPR, we need the type of the function. */
1541 type = TREE_OPERAND (exp, 0); /* pointer expression */
1542 type = TREE_TYPE (type); /* pointer type */
1543 type = TREE_TYPE (type); /* function type */
1545 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1547 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1551 /* ??? Need to count the actual number of registers to be used,
1552 not the possible number of registers. Fix later. */
1557 /* Otherwise okay. That also includes certain types of indirect calls. */
1561 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1562 arguments as in struct attribute_spec.handler. */
1564 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1567 tree args ATTRIBUTE_UNUSED;
1568 int flags ATTRIBUTE_UNUSED;
1571 if (TREE_CODE (*node) != FUNCTION_TYPE
1572 && TREE_CODE (*node) != METHOD_TYPE
1573 && TREE_CODE (*node) != FIELD_DECL
1574 && TREE_CODE (*node) != TYPE_DECL)
1576 warning ("`%s' attribute only applies to functions",
1577 IDENTIFIER_POINTER (name));
1578 *no_add_attrs = true;
1582 if (is_attribute_p ("fastcall", name))
1584 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1586 error ("fastcall and stdcall attributes are not compatible");
1588 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1590 error ("fastcall and regparm attributes are not compatible");
1593 else if (is_attribute_p ("stdcall", name))
1595 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1597 error ("fastcall and stdcall attributes are not compatible");
1604 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1605 *no_add_attrs = true;
1611 /* Handle a "regparm" attribute;
1612 arguments as in struct attribute_spec.handler. */
1614 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1618 int flags ATTRIBUTE_UNUSED;
1621 if (TREE_CODE (*node) != FUNCTION_TYPE
1622 && TREE_CODE (*node) != METHOD_TYPE
1623 && TREE_CODE (*node) != FIELD_DECL
1624 && TREE_CODE (*node) != TYPE_DECL)
1626 warning ("`%s' attribute only applies to functions",
1627 IDENTIFIER_POINTER (name));
1628 *no_add_attrs = true;
1634 cst = TREE_VALUE (args);
1635 if (TREE_CODE (cst) != INTEGER_CST)
1637 warning ("`%s' attribute requires an integer constant argument",
1638 IDENTIFIER_POINTER (name));
1639 *no_add_attrs = true;
1641 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1643 warning ("argument to `%s' attribute larger than %d",
1644 IDENTIFIER_POINTER (name), REGPARM_MAX);
1645 *no_add_attrs = true;
1648 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1650 error ("fastcall and regparm attributes are not compatible");
1657 /* Return 0 if the attributes for two types are incompatible, 1 if they
1658 are compatible, and 2 if they are nearly compatible (which causes a
1659 warning to be generated). */
1662 ix86_comp_type_attributes (type1, type2)
1666 /* Check for mismatch of non-default calling convention. */
1667 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1669 if (TREE_CODE (type1) != FUNCTION_TYPE)
1672 /* Check for mismatched fastcall types */
1673 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1674 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1677 /* Check for mismatched return types (cdecl vs stdcall). */
1678 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1679 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1684 /* Return the regparm value for a fuctio with the indicated TYPE. */
1687 ix86_fntype_regparm (type)
1692 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1694 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1696 return ix86_regparm;
1699 /* Value is the number of bytes of arguments automatically
1700 popped when returning from a subroutine call.
1701 FUNDECL is the declaration node of the function (as a tree),
1702 FUNTYPE is the data type of the function (as a tree),
1703 or for a library call it is an identifier node for the subroutine name.
1704 SIZE is the number of bytes of arguments passed on the stack.
1706 On the 80386, the RTD insn may be used to pop them if the number
1707 of args is fixed, but if the number is variable then the caller
1708 must pop them all. RTD can't be used for library calls now
1709 because the library is compiled with the Unix compiler.
1710 Use of RTD is a selectable option, since it is incompatible with
1711 standard Unix calling sequences. If the option is not selected,
1712 the caller must always pop the args.
1714 The attribute stdcall is equivalent to RTD on a per module basis. */
1717 ix86_return_pops_args (fundecl, funtype, size)
1722 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1724 /* Cdecl functions override -mrtd, and never pop the stack. */
1725 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1727 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1728 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1729 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1733 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1734 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1735 == void_type_node)))
1739 /* Lose any fake structure return argument if it is passed on the stack. */
1740 if (aggregate_value_p (TREE_TYPE (funtype))
1743 int nregs = ix86_fntype_regparm (funtype);
1746 return GET_MODE_SIZE (Pmode);
1752 /* Argument support functions. */
1754 /* Return true when register may be used to pass function parameters. */
1756 ix86_function_arg_regno_p (regno)
1761 return (regno < REGPARM_MAX
1762 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1763 if (SSE_REGNO_P (regno) && TARGET_SSE)
1765 /* RAX is used as hidden argument to va_arg functions. */
1768 for (i = 0; i < REGPARM_MAX; i++)
1769 if (regno == x86_64_int_parameter_registers[i])
1774 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1775 for a call to a function whose data type is FNTYPE.
1776 For a library call, FNTYPE is 0. */
1779 init_cumulative_args (cum, fntype, libname, fndecl)
1780 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1781 tree fntype; /* tree ptr for function decl */
1782 rtx libname; /* SYMBOL_REF of library name or 0 */
1785 static CUMULATIVE_ARGS zero_cum;
1786 tree param, next_param;
1787 bool user_convention = false;
1789 if (TARGET_DEBUG_ARG)
1791 fprintf (stderr, "\ninit_cumulative_args (");
1793 fprintf (stderr, "fntype code = %s, ret code = %s",
1794 tree_code_name[(int) TREE_CODE (fntype)],
1795 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1797 fprintf (stderr, "no fntype");
1800 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1805 /* Set up the number of registers to use for passing arguments. */
1806 cum->nregs = ix86_regparm;
1807 cum->sse_nregs = SSE_REGPARM_MAX;
1808 if (fntype && !TARGET_64BIT)
1810 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1814 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1815 user_convention = true;
1818 cum->maybe_vaarg = false;
1820 /* Use ecx and edx registers if function has fastcall attribute */
1821 if (fntype && !TARGET_64BIT)
1823 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1827 user_convention = true;
1831 /* Use register calling convention for local functions when possible. */
1832 if (!TARGET_64BIT && !user_convention && fndecl
1833 && flag_unit_at_a_time)
1835 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1838 /* We can't use regparm(3) for nested functions as these use
1839 static chain pointer in third argument. */
1840 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1848 /* Determine if this function has variable arguments. This is
1849 indicated by the last argument being 'void_type_mode' if there
1850 are no variable arguments. If there are variable arguments, then
1851 we won't pass anything in registers */
1855 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1856 param != 0; param = next_param)
1858 next_param = TREE_CHAIN (param);
1859 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1866 cum->maybe_vaarg = true;
1870 if ((!fntype && !libname)
1871 || (fntype && !TYPE_ARG_TYPES (fntype)))
1872 cum->maybe_vaarg = 1;
1874 if (TARGET_DEBUG_ARG)
1875 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1880 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1881 of this code is to classify each 8bytes of incoming argument by the register
1882 class and assign registers accordingly. */
1884 /* Return the union class of CLASS1 and CLASS2.
1885 See the x86-64 PS ABI for details. */
1887 static enum x86_64_reg_class
1888 merge_classes (class1, class2)
1889 enum x86_64_reg_class class1, class2;
1891 /* Rule #1: If both classes are equal, this is the resulting class. */
1892 if (class1 == class2)
1895 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1897 if (class1 == X86_64_NO_CLASS)
1899 if (class2 == X86_64_NO_CLASS)
1902 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1903 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1904 return X86_64_MEMORY_CLASS;
1906 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1907 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1908 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1909 return X86_64_INTEGERSI_CLASS;
1910 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1911 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1912 return X86_64_INTEGER_CLASS;
1914 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1915 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1916 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1917 return X86_64_MEMORY_CLASS;
1919 /* Rule #6: Otherwise class SSE is used. */
1920 return X86_64_SSE_CLASS;
1923 /* Classify the argument of type TYPE and mode MODE.
1924 CLASSES will be filled by the register class used to pass each word
1925 of the operand. The number of words is returned. In case the parameter
1926 should be passed in memory, 0 is returned. As a special case for zero
1927 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1929 BIT_OFFSET is used internally for handling records and specifies offset
1930 of the offset in bits modulo 256 to avoid overflow cases.
1932 See the x86-64 PS ABI for details.
1936 classify_argument (mode, type, classes, bit_offset)
1937 enum machine_mode mode;
1939 enum x86_64_reg_class classes[MAX_CLASSES];
1943 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1944 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1946 /* Variable sized entities are always passed/returned in memory. */
1950 if (mode != VOIDmode
1951 && MUST_PASS_IN_STACK (mode, type))
1954 if (type && AGGREGATE_TYPE_P (type))
1958 enum x86_64_reg_class subclasses[MAX_CLASSES];
1960 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1964 for (i = 0; i < words; i++)
1965 classes[i] = X86_64_NO_CLASS;
1967 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1968 signalize memory class, so handle it as special case. */
1971 classes[0] = X86_64_NO_CLASS;
1975 /* Classify each field of record and merge classes. */
1976 if (TREE_CODE (type) == RECORD_TYPE)
1978 /* For classes first merge in the field of the subclasses. */
1979 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1981 tree bases = TYPE_BINFO_BASETYPES (type);
1982 int n_bases = TREE_VEC_LENGTH (bases);
1985 for (i = 0; i < n_bases; ++i)
1987 tree binfo = TREE_VEC_ELT (bases, i);
1989 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1990 tree type = BINFO_TYPE (binfo);
1992 num = classify_argument (TYPE_MODE (type),
1994 (offset + bit_offset) % 256);
1997 for (i = 0; i < num; i++)
1999 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2001 merge_classes (subclasses[i], classes[i + pos]);
2005 /* And now merge the fields of structure. */
2006 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2008 if (TREE_CODE (field) == FIELD_DECL)
2012 /* Bitfields are always classified as integer. Handle them
2013 early, since later code would consider them to be
2014 misaligned integers. */
2015 if (DECL_BIT_FIELD (field))
2017 for (i = int_bit_position (field) / 8 / 8;
2018 i < (int_bit_position (field)
2019 + tree_low_cst (DECL_SIZE (field), 0)
2022 merge_classes (X86_64_INTEGER_CLASS,
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2029 (int_bit_position (field)
2030 + bit_offset) % 256);
2033 for (i = 0; i < num; i++)
2036 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2038 merge_classes (subclasses[i], classes[i + pos]);
2044 /* Arrays are handled as small records. */
2045 else if (TREE_CODE (type) == ARRAY_TYPE)
2048 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2049 TREE_TYPE (type), subclasses, bit_offset);
2053 /* The partial classes are now full classes. */
2054 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2055 subclasses[0] = X86_64_SSE_CLASS;
2056 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2057 subclasses[0] = X86_64_INTEGER_CLASS;
2059 for (i = 0; i < words; i++)
2060 classes[i] = subclasses[i % num];
2062 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2063 else if (TREE_CODE (type) == UNION_TYPE
2064 || TREE_CODE (type) == QUAL_UNION_TYPE)
2066 /* For classes first merge in the field of the subclasses. */
2067 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2069 tree bases = TYPE_BINFO_BASETYPES (type);
2070 int n_bases = TREE_VEC_LENGTH (bases);
2073 for (i = 0; i < n_bases; ++i)
2075 tree binfo = TREE_VEC_ELT (bases, i);
2077 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2078 tree type = BINFO_TYPE (binfo);
2080 num = classify_argument (TYPE_MODE (type),
2082 (offset + (bit_offset % 64)) % 256);
2085 for (i = 0; i < num; i++)
2087 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2089 merge_classes (subclasses[i], classes[i + pos]);
2093 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2095 if (TREE_CODE (field) == FIELD_DECL)
2098 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2099 TREE_TYPE (field), subclasses,
2103 for (i = 0; i < num; i++)
2104 classes[i] = merge_classes (subclasses[i], classes[i]);
2111 /* Final merger cleanup. */
2112 for (i = 0; i < words; i++)
2114 /* If one class is MEMORY, everything should be passed in
2116 if (classes[i] == X86_64_MEMORY_CLASS)
2119 /* The X86_64_SSEUP_CLASS should be always preceded by
2120 X86_64_SSE_CLASS. */
2121 if (classes[i] == X86_64_SSEUP_CLASS
2122 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2123 classes[i] = X86_64_SSE_CLASS;
2125 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2126 if (classes[i] == X86_64_X87UP_CLASS
2127 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2128 classes[i] = X86_64_SSE_CLASS;
2133 /* Compute alignment needed. We align all types to natural boundaries with
2134 exception of XFmode that is aligned to 64bits. */
2135 if (mode != VOIDmode && mode != BLKmode)
2137 int mode_alignment = GET_MODE_BITSIZE (mode);
2140 mode_alignment = 128;
2141 else if (mode == XCmode)
2142 mode_alignment = 256;
2143 /* Misaligned fields are always returned in memory. */
2144 if (bit_offset % mode_alignment)
2148 /* Classification of atomic types. */
2158 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2159 classes[0] = X86_64_INTEGERSI_CLASS;
2161 classes[0] = X86_64_INTEGER_CLASS;
2165 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2168 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2169 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2172 if (!(bit_offset % 64))
2173 classes[0] = X86_64_SSESF_CLASS;
2175 classes[0] = X86_64_SSE_CLASS;
2178 classes[0] = X86_64_SSEDF_CLASS;
2181 classes[0] = X86_64_X87_CLASS;
2182 classes[1] = X86_64_X87UP_CLASS;
2185 classes[0] = X86_64_X87_CLASS;
2186 classes[1] = X86_64_X87UP_CLASS;
2187 classes[2] = X86_64_X87_CLASS;
2188 classes[3] = X86_64_X87UP_CLASS;
2191 classes[0] = X86_64_SSEDF_CLASS;
2192 classes[1] = X86_64_SSEDF_CLASS;
2195 classes[0] = X86_64_SSE_CLASS;
2203 classes[0] = X86_64_SSE_CLASS;
2204 classes[1] = X86_64_SSEUP_CLASS;
2219 /* Examine the argument and return set number of register required in each
2220 class. Return 0 iff parameter should be passed in memory. */
2222 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2223 enum machine_mode mode;
2225 int *int_nregs, *sse_nregs;
2228 enum x86_64_reg_class class[MAX_CLASSES];
2229 int n = classify_argument (mode, type, class, 0);
2235 for (n--; n >= 0; n--)
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2242 case X86_64_SSE_CLASS:
2243 case X86_64_SSESF_CLASS:
2244 case X86_64_SSEDF_CLASS:
2247 case X86_64_NO_CLASS:
2248 case X86_64_SSEUP_CLASS:
2250 case X86_64_X87_CLASS:
2251 case X86_64_X87UP_CLASS:
2255 case X86_64_MEMORY_CLASS:
2260 /* Construct container for the argument used by GCC interface. See
2261 FUNCTION_ARG for the detailed description. */
2263 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2264 enum machine_mode mode;
2267 int nintregs, nsseregs;
2271 enum machine_mode tmpmode;
2273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2274 enum x86_64_reg_class class[MAX_CLASSES];
2278 int needed_sseregs, needed_intregs;
2279 rtx exp[MAX_CLASSES];
2282 n = classify_argument (mode, type, class, 0);
2283 if (TARGET_DEBUG_ARG)
2286 fprintf (stderr, "Memory class\n");
2289 fprintf (stderr, "Classes:");
2290 for (i = 0; i < n; i++)
2292 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2294 fprintf (stderr, "\n");
2299 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2301 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2304 /* First construct simple cases. Avoid SCmode, since we want to use
2305 single register to pass this type. */
2306 if (n == 1 && mode != SCmode)
2309 case X86_64_INTEGER_CLASS:
2310 case X86_64_INTEGERSI_CLASS:
2311 return gen_rtx_REG (mode, intreg[0]);
2312 case X86_64_SSE_CLASS:
2313 case X86_64_SSESF_CLASS:
2314 case X86_64_SSEDF_CLASS:
2315 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2316 case X86_64_X87_CLASS:
2317 return gen_rtx_REG (mode, FIRST_STACK_REG);
2318 case X86_64_NO_CLASS:
2319 /* Zero sized array, struct or class. */
2324 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2325 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2327 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2328 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2329 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2330 && class[1] == X86_64_INTEGER_CLASS
2331 && (mode == CDImode || mode == TImode)
2332 && intreg[0] + 1 == intreg[1])
2333 return gen_rtx_REG (mode, intreg[0]);
2335 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2336 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2337 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2339 /* Otherwise figure out the entries of the PARALLEL. */
2340 for (i = 0; i < n; i++)
2344 case X86_64_NO_CLASS:
2346 case X86_64_INTEGER_CLASS:
2347 case X86_64_INTEGERSI_CLASS:
2348 /* Merge TImodes on aligned occasions here too. */
2349 if (i * 8 + 8 > bytes)
2350 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2351 else if (class[i] == X86_64_INTEGERSI_CLASS)
2355 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2356 if (tmpmode == BLKmode)
2358 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2359 gen_rtx_REG (tmpmode, *intreg),
2363 case X86_64_SSESF_CLASS:
2364 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2365 gen_rtx_REG (SFmode,
2366 SSE_REGNO (sse_regno)),
2370 case X86_64_SSEDF_CLASS:
2371 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2372 gen_rtx_REG (DFmode,
2373 SSE_REGNO (sse_regno)),
2377 case X86_64_SSE_CLASS:
2378 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2382 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2383 gen_rtx_REG (tmpmode,
2384 SSE_REGNO (sse_regno)),
2386 if (tmpmode == TImode)
2394 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2395 for (i = 0; i < nexps; i++)
2396 XVECEXP (ret, 0, i) = exp [i];
2400 /* Update the data in CUM to advance over an argument
2401 of mode MODE and data type TYPE.
2402 (TYPE is null for libcalls where that information may not be available.) */
2405 function_arg_advance (cum, mode, type, named)
2406 CUMULATIVE_ARGS *cum; /* current arg information */
2407 enum machine_mode mode; /* current arg mode */
2408 tree type; /* type of the argument or 0 if lib support */
2409 int named; /* whether or not the argument was named */
2412 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2413 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2415 if (TARGET_DEBUG_ARG)
2417 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2418 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2421 int int_nregs, sse_nregs;
2422 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2423 cum->words += words;
2424 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2426 cum->nregs -= int_nregs;
2427 cum->sse_nregs -= sse_nregs;
2428 cum->regno += int_nregs;
2429 cum->sse_regno += sse_nregs;
2432 cum->words += words;
2436 if (TARGET_SSE && mode == TImode)
2438 cum->sse_words += words;
2439 cum->sse_nregs -= 1;
2440 cum->sse_regno += 1;
2441 if (cum->sse_nregs <= 0)
2449 cum->words += words;
2450 cum->nregs -= words;
2451 cum->regno += words;
2453 if (cum->nregs <= 0)
2463 /* Define where to put the arguments to a function.
2464 Value is zero to push the argument on the stack,
2465 or a hard register in which to store the argument.
2467 MODE is the argument's machine mode.
2468 TYPE is the data type of the argument (as a tree).
2469 This is null for libcalls where that information may
2471 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2472 the preceding args and about the function being called.
2473 NAMED is nonzero if this argument is a named parameter
2474 (otherwise it is an extra parameter matching an ellipsis). */
2477 function_arg (cum, mode, type, named)
2478 CUMULATIVE_ARGS *cum; /* current arg information */
2479 enum machine_mode mode; /* current arg mode */
2480 tree type; /* type of the argument or 0 if lib support */
2481 int named; /* != 0 for normal args, == 0 for ... args */
2485 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2486 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2488 /* Handle a hidden AL argument containing number of registers for varargs
2489 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2491 if (mode == VOIDmode)
2494 return GEN_INT (cum->maybe_vaarg
2495 ? (cum->sse_nregs < 0
2503 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2504 &x86_64_int_parameter_registers [cum->regno],
2509 /* For now, pass fp/complex values on the stack. */
2521 if (words <= cum->nregs)
2523 int regno = cum->regno;
2525 /* Fastcall allocates the first two DWORD (SImode) or
2526 smaller arguments to ECX and EDX. */
2529 if (mode == BLKmode || mode == DImode)
2532 /* ECX not EAX is the first allocated register. */
2536 ret = gen_rtx_REG (mode, regno);
2541 ret = gen_rtx_REG (mode, cum->sse_regno);
2545 if (TARGET_DEBUG_ARG)
2548 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2549 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2552 print_simple_rtl (stderr, ret);
2554 fprintf (stderr, ", stack");
2556 fprintf (stderr, " )\n");
2562 /* A C expression that indicates when an argument must be passed by
2563 reference. If nonzero for an argument, a copy of that argument is
2564 made in memory and a pointer to the argument is passed instead of
2565 the argument itself. The pointer is passed in whatever way is
2566 appropriate for passing a pointer to that type. */
2569 function_arg_pass_by_reference (cum, mode, type, named)
2570 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2571 enum machine_mode mode ATTRIBUTE_UNUSED;
2573 int named ATTRIBUTE_UNUSED;
2578 if (type && int_size_in_bytes (type) == -1)
2580 if (TARGET_DEBUG_ARG)
2581 fprintf (stderr, "function_arg_pass_by_reference\n");
2588 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2591 contains_128bit_aligned_vector_p (type)
2594 enum machine_mode mode = TYPE_MODE (type);
2595 if (SSE_REG_MODE_P (mode)
2596 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2598 if (TYPE_ALIGN (type) < 128)
2601 if (AGGREGATE_TYPE_P (type))
2603 /* Walk the agregates recursivly. */
2604 if (TREE_CODE (type) == RECORD_TYPE
2605 || TREE_CODE (type) == UNION_TYPE
2606 || TREE_CODE (type) == QUAL_UNION_TYPE)
2610 if (TYPE_BINFO (type) != NULL
2611 && TYPE_BINFO_BASETYPES (type) != NULL)
2613 tree bases = TYPE_BINFO_BASETYPES (type);
2614 int n_bases = TREE_VEC_LENGTH (bases);
2617 for (i = 0; i < n_bases; ++i)
2619 tree binfo = TREE_VEC_ELT (bases, i);
2620 tree type = BINFO_TYPE (binfo);
2622 if (contains_128bit_aligned_vector_p (type))
2626 /* And now merge the fields of structure. */
2627 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2629 if (TREE_CODE (field) == FIELD_DECL
2630 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2634 /* Just for use if some languages passes arrays by value. */
2635 else if (TREE_CODE (type) == ARRAY_TYPE)
2637 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2646 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2650 ix86_function_arg_boundary (mode, type)
2651 enum machine_mode mode;
2656 align = TYPE_ALIGN (type);
2658 align = GET_MODE_ALIGNMENT (mode);
2659 if (align < PARM_BOUNDARY)
2660 align = PARM_BOUNDARY;
2663 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2664 make an exception for SSE modes since these require 128bit
2667 The handling here differs from field_alignment. ICC aligns MMX
2668 arguments to 4 byte boundaries, while structure fields are aligned
2669 to 8 byte boundaries. */
2672 if (!SSE_REG_MODE_P (mode))
2673 align = PARM_BOUNDARY;
2677 if (!contains_128bit_aligned_vector_p (type))
2678 align = PARM_BOUNDARY;
2680 if (align != PARM_BOUNDARY && !TARGET_SSE)
2688 /* Return true if N is a possible register number of function value. */
2690 ix86_function_value_regno_p (regno)
2695 return ((regno) == 0
2696 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2697 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2699 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2700 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2701 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2704 /* Define how to find the value returned by a function.
2705 VALTYPE is the data type of the value (as a tree).
2706 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2707 otherwise, FUNC is 0. */
2709 ix86_function_value (valtype)
2714 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2715 REGPARM_MAX, SSE_REGPARM_MAX,
2716 x86_64_int_return_registers, 0);
2717 /* For zero sized structures, construct_container return NULL, but we need
2718 to keep rest of compiler happy by returning meaningful value. */
2720 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2724 return gen_rtx_REG (TYPE_MODE (valtype),
2725 ix86_value_regno (TYPE_MODE (valtype)));
2728 /* Return false iff type is returned in memory. */
2730 ix86_return_in_memory (type)
2733 int needed_intregs, needed_sseregs;
2736 return !examine_argument (TYPE_MODE (type), type, 1,
2737 &needed_intregs, &needed_sseregs);
2741 if (TYPE_MODE (type) == BLKmode)
2743 else if (MS_AGGREGATE_RETURN
2744 && AGGREGATE_TYPE_P (type)
2745 && int_size_in_bytes(type) <= 8)
2747 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2748 && int_size_in_bytes (type) == 8)
2749 || (int_size_in_bytes (type) > 12
2750 && TYPE_MODE (type) != TImode
2751 && TYPE_MODE (type) != TFmode
2752 && !VECTOR_MODE_P (TYPE_MODE (type))))
2758 /* Define how to find the value returned by a library function
2759 assuming the value has mode MODE. */
2761 ix86_libcall_value (mode)
2762 enum machine_mode mode;
2772 return gen_rtx_REG (mode, FIRST_SSE_REG);
2775 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2777 return gen_rtx_REG (mode, 0);
2781 return gen_rtx_REG (mode, ix86_value_regno (mode));
2784 /* Given a mode, return the register to use for a return value. */
2787 ix86_value_regno (mode)
2788 enum machine_mode mode;
2790 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2791 return FIRST_FLOAT_REG;
2792 if (mode == TImode || VECTOR_MODE_P (mode))
2793 return FIRST_SSE_REG;
2797 /* Create the va_list data type. */
2800 ix86_build_va_list ()
2802 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2804 /* For i386 we use plain pointer to argument area. */
2806 return build_pointer_type (char_type_node);
2808 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2809 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2811 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2812 unsigned_type_node);
2813 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2814 unsigned_type_node);
2815 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2817 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2820 DECL_FIELD_CONTEXT (f_gpr) = record;
2821 DECL_FIELD_CONTEXT (f_fpr) = record;
2822 DECL_FIELD_CONTEXT (f_ovf) = record;
2823 DECL_FIELD_CONTEXT (f_sav) = record;
2825 TREE_CHAIN (record) = type_decl;
2826 TYPE_NAME (record) = type_decl;
2827 TYPE_FIELDS (record) = f_gpr;
2828 TREE_CHAIN (f_gpr) = f_fpr;
2829 TREE_CHAIN (f_fpr) = f_ovf;
2830 TREE_CHAIN (f_ovf) = f_sav;
2832 layout_type (record);
2834 /* The correct type is an array type of one element. */
2835 return build_array_type (record, build_index_type (size_zero_node));
2838 /* Perform any needed actions needed for a function that is receiving a
2839 variable number of arguments.
2843 MODE and TYPE are the mode and type of the current parameter.
2845 PRETEND_SIZE is a variable that should be set to the amount of stack
2846 that must be pushed by the prolog to pretend that our caller pushed
2849 Normally, this macro will push all remaining incoming registers on the
2850 stack and set PRETEND_SIZE to the length of the registers pushed. */
2853 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2854 CUMULATIVE_ARGS *cum;
2855 enum machine_mode mode;
2857 int *pretend_size ATTRIBUTE_UNUSED;
2861 CUMULATIVE_ARGS next_cum;
2862 rtx save_area = NULL_RTX, mem;
2875 /* Indicate to allocate space on the stack for varargs save area. */
2876 ix86_save_varrargs_registers = 1;
2878 cfun->stack_alignment_needed = 128;
2880 fntype = TREE_TYPE (current_function_decl);
2881 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2882 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2883 != void_type_node));
2885 /* For varargs, we do not want to skip the dummy va_dcl argument.
2886 For stdargs, we do want to skip the last named argument. */
2889 function_arg_advance (&next_cum, mode, type, 1);
2892 save_area = frame_pointer_rtx;
2894 set = get_varargs_alias_set ();
2896 for (i = next_cum.regno; i < ix86_regparm; i++)
2898 mem = gen_rtx_MEM (Pmode,
2899 plus_constant (save_area, i * UNITS_PER_WORD));
2900 set_mem_alias_set (mem, set);
2901 emit_move_insn (mem, gen_rtx_REG (Pmode,
2902 x86_64_int_parameter_registers[i]));
2905 if (next_cum.sse_nregs)
2907 /* Now emit code to save SSE registers. The AX parameter contains number
2908 of SSE parameter registers used to call this function. We use
2909 sse_prologue_save insn template that produces computed jump across
2910 SSE saves. We need some preparation work to get this working. */
2912 label = gen_label_rtx ();
2913 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2915 /* Compute address to jump to :
2916 label - 5*eax + nnamed_sse_arguments*5 */
2917 tmp_reg = gen_reg_rtx (Pmode);
2918 nsse_reg = gen_reg_rtx (Pmode);
2919 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2920 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2921 gen_rtx_MULT (Pmode, nsse_reg,
2923 if (next_cum.sse_regno)
2926 gen_rtx_CONST (DImode,
2927 gen_rtx_PLUS (DImode,
2929 GEN_INT (next_cum.sse_regno * 4))));
2931 emit_move_insn (nsse_reg, label_ref);
2932 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2934 /* Compute address of memory block we save into. We always use pointer
2935 pointing 127 bytes after first byte to store - this is needed to keep
2936 instruction size limited by 4 bytes. */
2937 tmp_reg = gen_reg_rtx (Pmode);
2938 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2939 plus_constant (save_area,
2940 8 * REGPARM_MAX + 127)));
2941 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2942 set_mem_alias_set (mem, set);
2943 set_mem_align (mem, BITS_PER_WORD);
2945 /* And finally do the dirty job! */
2946 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2947 GEN_INT (next_cum.sse_regno), label));
2952 /* Implement va_start. */
2955 ix86_va_start (valist, nextarg)
2959 HOST_WIDE_INT words, n_gpr, n_fpr;
2960 tree f_gpr, f_fpr, f_ovf, f_sav;
2961 tree gpr, fpr, ovf, sav, t;
2963 /* Only 64bit target needs something special. */
2966 std_expand_builtin_va_start (valist, nextarg);
2970 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2971 f_fpr = TREE_CHAIN (f_gpr);
2972 f_ovf = TREE_CHAIN (f_fpr);
2973 f_sav = TREE_CHAIN (f_ovf);
2975 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2976 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2977 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2978 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2979 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2981 /* Count number of gp and fp argument registers used. */
2982 words = current_function_args_info.words;
2983 n_gpr = current_function_args_info.regno;
2984 n_fpr = current_function_args_info.sse_regno;
2986 if (TARGET_DEBUG_ARG)
2987 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2988 (int) words, (int) n_gpr, (int) n_fpr);
2990 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2991 build_int_2 (n_gpr * 8, 0));
2992 TREE_SIDE_EFFECTS (t) = 1;
2993 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2995 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2996 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2997 TREE_SIDE_EFFECTS (t) = 1;
2998 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3000 /* Find the overflow area. */
3001 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3003 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3004 build_int_2 (words * UNITS_PER_WORD, 0));
3005 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3006 TREE_SIDE_EFFECTS (t) = 1;
3007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3009 /* Find the register save area.
3010 Prologue of the function save it right above stack frame. */
3011 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3012 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3013 TREE_SIDE_EFFECTS (t) = 1;
3014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3017 /* Implement va_arg. */
3019 ix86_va_arg (valist, type)
3022 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3023 tree f_gpr, f_fpr, f_ovf, f_sav;
3024 tree gpr, fpr, ovf, sav, t;
3026 rtx lab_false, lab_over = NULL_RTX;
3031 /* Only 64bit target needs something special. */
3034 return std_expand_builtin_va_arg (valist, type);
3037 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3038 f_fpr = TREE_CHAIN (f_gpr);
3039 f_ovf = TREE_CHAIN (f_fpr);
3040 f_sav = TREE_CHAIN (f_ovf);
3042 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3043 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3044 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3045 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3046 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3048 size = int_size_in_bytes (type);
3051 /* Passed by reference. */
3053 type = build_pointer_type (type);
3054 size = int_size_in_bytes (type);
3056 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3058 container = construct_container (TYPE_MODE (type), type, 0,
3059 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3061 * Pull the value out of the saved registers ...
3064 addr_rtx = gen_reg_rtx (Pmode);
3068 rtx int_addr_rtx, sse_addr_rtx;
3069 int needed_intregs, needed_sseregs;
3072 lab_over = gen_label_rtx ();
3073 lab_false = gen_label_rtx ();
3075 examine_argument (TYPE_MODE (type), type, 0,
3076 &needed_intregs, &needed_sseregs);
3079 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3080 || TYPE_ALIGN (type) > 128);
3082 /* In case we are passing structure, verify that it is consecutive block
3083 on the register save area. If not we need to do moves. */
3084 if (!need_temp && !REG_P (container))
3086 /* Verify that all registers are strictly consecutive */
3087 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3091 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3093 rtx slot = XVECEXP (container, 0, i);
3094 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3095 || INTVAL (XEXP (slot, 1)) != i * 16)
3103 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3105 rtx slot = XVECEXP (container, 0, i);
3106 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3107 || INTVAL (XEXP (slot, 1)) != i * 8)
3114 int_addr_rtx = addr_rtx;
3115 sse_addr_rtx = addr_rtx;
3119 int_addr_rtx = gen_reg_rtx (Pmode);
3120 sse_addr_rtx = gen_reg_rtx (Pmode);
3122 /* First ensure that we fit completely in registers. */
3125 emit_cmp_and_jump_insns (expand_expr
3126 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3127 GEN_INT ((REGPARM_MAX - needed_intregs +
3128 1) * 8), GE, const1_rtx, SImode,
3133 emit_cmp_and_jump_insns (expand_expr
3134 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3135 GEN_INT ((SSE_REGPARM_MAX -
3136 needed_sseregs + 1) * 16 +
3137 REGPARM_MAX * 8), GE, const1_rtx,
3138 SImode, 1, lab_false);
3141 /* Compute index to start of area used for integer regs. */
3144 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3145 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3146 if (r != int_addr_rtx)
3147 emit_move_insn (int_addr_rtx, r);
3151 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3152 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3153 if (r != sse_addr_rtx)
3154 emit_move_insn (sse_addr_rtx, r);
3162 /* Never use the memory itself, as it has the alias set. */
3163 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3164 mem = gen_rtx_MEM (BLKmode, x);
3165 force_operand (x, addr_rtx);
3166 set_mem_alias_set (mem, get_varargs_alias_set ());
3167 set_mem_align (mem, BITS_PER_UNIT);
3169 for (i = 0; i < XVECLEN (container, 0); i++)
3171 rtx slot = XVECEXP (container, 0, i);
3172 rtx reg = XEXP (slot, 0);
3173 enum machine_mode mode = GET_MODE (reg);
3179 if (SSE_REGNO_P (REGNO (reg)))
3181 src_addr = sse_addr_rtx;
3182 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3186 src_addr = int_addr_rtx;
3187 src_offset = REGNO (reg) * 8;
3189 src_mem = gen_rtx_MEM (mode, src_addr);
3190 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3191 src_mem = adjust_address (src_mem, mode, src_offset);
3192 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3193 emit_move_insn (dest_mem, src_mem);
3200 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3201 build_int_2 (needed_intregs * 8, 0));
3202 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3203 TREE_SIDE_EFFECTS (t) = 1;
3204 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3209 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3210 build_int_2 (needed_sseregs * 16, 0));
3211 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3212 TREE_SIDE_EFFECTS (t) = 1;
3213 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3216 emit_jump_insn (gen_jump (lab_over));
3218 emit_label (lab_false);
3221 /* ... otherwise out of the overflow area. */
3223 /* Care for on-stack alignment if needed. */
3224 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3228 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3229 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3230 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3234 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3236 emit_move_insn (addr_rtx, r);
3239 build (PLUS_EXPR, TREE_TYPE (t), t,
3240 build_int_2 (rsize * UNITS_PER_WORD, 0));
3241 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3246 emit_label (lab_over);
3250 r = gen_rtx_MEM (Pmode, addr_rtx);
3251 set_mem_alias_set (r, get_varargs_alias_set ());
3252 emit_move_insn (addr_rtx, r);
3258 /* Return nonzero if OP is either a i387 or SSE fp register. */
3260 any_fp_register_operand (op, mode)
3262 enum machine_mode mode ATTRIBUTE_UNUSED;
3264 return ANY_FP_REG_P (op);
3267 /* Return nonzero if OP is an i387 fp register. */
3269 fp_register_operand (op, mode)
3271 enum machine_mode mode ATTRIBUTE_UNUSED;
3273 return FP_REG_P (op);
3276 /* Return nonzero if OP is a non-fp register_operand. */
3278 register_and_not_any_fp_reg_operand (op, mode)
3280 enum machine_mode mode;
3282 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3285 /* Return nonzero if OP is a register operand other than an
3286 i387 fp register. */
3288 register_and_not_fp_reg_operand (op, mode)
3290 enum machine_mode mode;
3292 return register_operand (op, mode) && !FP_REG_P (op);
3295 /* Return nonzero if OP is general operand representable on x86_64. */
3298 x86_64_general_operand (op, mode)
3300 enum machine_mode mode;
3303 return general_operand (op, mode);
3304 if (nonimmediate_operand (op, mode))
3306 return x86_64_sign_extended_value (op);
3309 /* Return nonzero if OP is general operand representable on x86_64
3310 as either sign extended or zero extended constant. */
3313 x86_64_szext_general_operand (op, mode)
3315 enum machine_mode mode;
3318 return general_operand (op, mode);
3319 if (nonimmediate_operand (op, mode))
3321 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3324 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3327 x86_64_nonmemory_operand (op, mode)
3329 enum machine_mode mode;
3332 return nonmemory_operand (op, mode);
3333 if (register_operand (op, mode))
3335 return x86_64_sign_extended_value (op);
3338 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3341 x86_64_movabs_operand (op, mode)
3343 enum machine_mode mode;
3345 if (!TARGET_64BIT || !flag_pic)
3346 return nonmemory_operand (op, mode);
3347 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3349 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3354 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3357 x86_64_szext_nonmemory_operand (op, mode)
3359 enum machine_mode mode;
3362 return nonmemory_operand (op, mode);
3363 if (register_operand (op, mode))
3365 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3368 /* Return nonzero if OP is immediate operand representable on x86_64. */
3371 x86_64_immediate_operand (op, mode)
3373 enum machine_mode mode;
3376 return immediate_operand (op, mode);
3377 return x86_64_sign_extended_value (op);
3380 /* Return nonzero if OP is immediate operand representable on x86_64. */
3383 x86_64_zext_immediate_operand (op, mode)
3385 enum machine_mode mode ATTRIBUTE_UNUSED;
3387 return x86_64_zero_extended_value (op);
3390 /* Return nonzero if OP is (const_int 1), else return zero. */
3393 const_int_1_operand (op, mode)
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3397 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3400 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3401 for shift & compare patterns, as shifting by 0 does not change flags),
3402 else return zero. */
3405 const_int_1_31_operand (op, mode)
3407 enum machine_mode mode ATTRIBUTE_UNUSED;
3409 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3412 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3413 reference and a constant. */
3416 symbolic_operand (op, mode)
3418 enum machine_mode mode ATTRIBUTE_UNUSED;
3420 switch (GET_CODE (op))
3428 if (GET_CODE (op) == SYMBOL_REF
3429 || GET_CODE (op) == LABEL_REF
3430 || (GET_CODE (op) == UNSPEC
3431 && (XINT (op, 1) == UNSPEC_GOT
3432 || XINT (op, 1) == UNSPEC_GOTOFF
3433 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3435 if (GET_CODE (op) != PLUS
3436 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3440 if (GET_CODE (op) == SYMBOL_REF
3441 || GET_CODE (op) == LABEL_REF)
3443 /* Only @GOTOFF gets offsets. */
3444 if (GET_CODE (op) != UNSPEC
3445 || XINT (op, 1) != UNSPEC_GOTOFF)
3448 op = XVECEXP (op, 0, 0);
3449 if (GET_CODE (op) == SYMBOL_REF
3450 || GET_CODE (op) == LABEL_REF)
3459 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3462 pic_symbolic_operand (op, mode)
3464 enum machine_mode mode ATTRIBUTE_UNUSED;
3466 if (GET_CODE (op) != CONST)
3471 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3476 if (GET_CODE (op) == UNSPEC)
3478 if (GET_CODE (op) != PLUS
3479 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3482 if (GET_CODE (op) == UNSPEC)
3488 /* Return true if OP is a symbolic operand that resolves locally. */
3491 local_symbolic_operand (op, mode)
3493 enum machine_mode mode ATTRIBUTE_UNUSED;
3495 if (GET_CODE (op) == CONST
3496 && GET_CODE (XEXP (op, 0)) == PLUS
3497 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3498 op = XEXP (XEXP (op, 0), 0);
3500 if (GET_CODE (op) == LABEL_REF)
3503 if (GET_CODE (op) != SYMBOL_REF)
3506 if (SYMBOL_REF_LOCAL_P (op))
3509 /* There is, however, a not insubstantial body of code in the rest of
3510 the compiler that assumes it can just stick the results of
3511 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3512 /* ??? This is a hack. Should update the body of the compiler to
3513 always create a DECL an invoke targetm.encode_section_info. */
3514 if (strncmp (XSTR (op, 0), internal_label_prefix,
3515 internal_label_prefix_len) == 0)
3521 /* Test for various thread-local symbols. */
3524 tls_symbolic_operand (op, mode)
3526 enum machine_mode mode ATTRIBUTE_UNUSED;
3528 if (GET_CODE (op) != SYMBOL_REF)
3530 return SYMBOL_REF_TLS_MODEL (op);
3534 tls_symbolic_operand_1 (op, kind)
3536 enum tls_model kind;
3538 if (GET_CODE (op) != SYMBOL_REF)
3540 return SYMBOL_REF_TLS_MODEL (op) == kind;
3544 global_dynamic_symbolic_operand (op, mode)
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3548 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3552 local_dynamic_symbolic_operand (op, mode)
3554 enum machine_mode mode ATTRIBUTE_UNUSED;
3556 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3560 initial_exec_symbolic_operand (op, mode)
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3564 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3568 local_exec_symbolic_operand (op, mode)
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3572 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3575 /* Test for a valid operand for a call instruction. Don't allow the
3576 arg pointer register or virtual regs since they may decay into
3577 reg + const, which the patterns can't handle. */
3580 call_insn_operand (op, mode)
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3584 /* Disallow indirect through a virtual register. This leads to
3585 compiler aborts when trying to eliminate them. */
3586 if (GET_CODE (op) == REG
3587 && (op == arg_pointer_rtx
3588 || op == frame_pointer_rtx
3589 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3590 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3593 /* Disallow `call 1234'. Due to varying assembler lameness this
3594 gets either rejected or translated to `call .+1234'. */
3595 if (GET_CODE (op) == CONST_INT)
3598 /* Explicitly allow SYMBOL_REF even if pic. */
3599 if (GET_CODE (op) == SYMBOL_REF)
3602 /* Otherwise we can allow any general_operand in the address. */
3603 return general_operand (op, Pmode);
3606 /* Test for a valid operand for a call instruction. Don't allow the
3607 arg pointer register or virtual regs since they may decay into
3608 reg + const, which the patterns can't handle. */
3611 sibcall_insn_operand (op, mode)
3613 enum machine_mode mode ATTRIBUTE_UNUSED;
3615 /* Disallow indirect through a virtual register. This leads to
3616 compiler aborts when trying to eliminate them. */
3617 if (GET_CODE (op) == REG
3618 && (op == arg_pointer_rtx
3619 || op == frame_pointer_rtx
3620 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3621 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3624 /* Explicitly allow SYMBOL_REF even if pic. */
3625 if (GET_CODE (op) == SYMBOL_REF)
3628 /* Otherwise we can only allow register operands. */
3629 return register_operand (op, Pmode);
3633 constant_call_address_operand (op, mode)
3635 enum machine_mode mode ATTRIBUTE_UNUSED;
3637 if (GET_CODE (op) == CONST
3638 && GET_CODE (XEXP (op, 0)) == PLUS
3639 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3640 op = XEXP (XEXP (op, 0), 0);
3641 return GET_CODE (op) == SYMBOL_REF;
3644 /* Match exactly zero and one. */
3647 const0_operand (op, mode)
3649 enum machine_mode mode;
3651 return op == CONST0_RTX (mode);
3655 const1_operand (op, mode)
3657 enum machine_mode mode ATTRIBUTE_UNUSED;
3659 return op == const1_rtx;
3662 /* Match 2, 4, or 8. Used for leal multiplicands. */
3665 const248_operand (op, mode)
3667 enum machine_mode mode ATTRIBUTE_UNUSED;
3669 return (GET_CODE (op) == CONST_INT
3670 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3673 /* True if this is a constant appropriate for an increment or decrement. */
3676 incdec_operand (op, mode)
3678 enum machine_mode mode ATTRIBUTE_UNUSED;
3680 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3681 registers, since carry flag is not set. */
3682 if (TARGET_PENTIUM4 && !optimize_size)
3684 return op == const1_rtx || op == constm1_rtx;
3687 /* Return nonzero if OP is acceptable as operand of DImode shift
3691 shiftdi_operand (op, mode)
3693 enum machine_mode mode ATTRIBUTE_UNUSED;
3696 return nonimmediate_operand (op, mode);
3698 return register_operand (op, mode);
3701 /* Return false if this is the stack pointer, or any other fake
3702 register eliminable to the stack pointer. Otherwise, this is
3705 This is used to prevent esp from being used as an index reg.
3706 Which would only happen in pathological cases. */
3709 reg_no_sp_operand (op, mode)
3711 enum machine_mode mode;
3714 if (GET_CODE (t) == SUBREG)
3716 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3719 return register_operand (op, mode);
3723 mmx_reg_operand (op, mode)
3725 enum machine_mode mode ATTRIBUTE_UNUSED;
3727 return MMX_REG_P (op);
3730 /* Return false if this is any eliminable register. Otherwise
3734 general_no_elim_operand (op, mode)
3736 enum machine_mode mode;
3739 if (GET_CODE (t) == SUBREG)
3741 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3742 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3743 || t == virtual_stack_dynamic_rtx)
3746 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3747 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3750 return general_operand (op, mode);
3753 /* Return false if this is any eliminable register. Otherwise
3754 register_operand or const_int. */
3757 nonmemory_no_elim_operand (op, mode)
3759 enum machine_mode mode;
3762 if (GET_CODE (t) == SUBREG)
3764 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3765 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3766 || t == virtual_stack_dynamic_rtx)
3769 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3772 /* Return false if this is any eliminable register or stack register,
3773 otherwise work like register_operand. */
3776 index_register_operand (op, mode)
3778 enum machine_mode mode;
3781 if (GET_CODE (t) == SUBREG)
3785 if (t == arg_pointer_rtx
3786 || t == frame_pointer_rtx
3787 || t == virtual_incoming_args_rtx
3788 || t == virtual_stack_vars_rtx
3789 || t == virtual_stack_dynamic_rtx
3790 || REGNO (t) == STACK_POINTER_REGNUM)
3793 return general_operand (op, mode);
3796 /* Return true if op is a Q_REGS class register. */
3799 q_regs_operand (op, mode)
3801 enum machine_mode mode;
3803 if (mode != VOIDmode && GET_MODE (op) != mode)
3805 if (GET_CODE (op) == SUBREG)
3806 op = SUBREG_REG (op);
3807 return ANY_QI_REG_P (op);
3810 /* Return true if op is an flags register. */
3813 flags_reg_operand (op, mode)
3815 enum machine_mode mode;
3817 if (mode != VOIDmode && GET_MODE (op) != mode)
3819 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3822 /* Return true if op is a NON_Q_REGS class register. */
3825 non_q_regs_operand (op, mode)
3827 enum machine_mode mode;
3829 if (mode != VOIDmode && GET_MODE (op) != mode)
3831 if (GET_CODE (op) == SUBREG)
3832 op = SUBREG_REG (op);
3833 return NON_QI_REG_P (op);
3837 zero_extended_scalar_load_operand (op, mode)
3839 enum machine_mode mode ATTRIBUTE_UNUSED;
3842 if (GET_CODE (op) != MEM)
3844 op = maybe_get_pool_constant (op);
3847 if (GET_CODE (op) != CONST_VECTOR)
3850 (GET_MODE_SIZE (GET_MODE (op)) /
3851 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3852 for (n_elts--; n_elts > 0; n_elts--)
3854 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3855 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3861 /* Return 1 when OP is operand acceptable for standard SSE move. */
3863 vector_move_operand (op, mode)
3865 enum machine_mode mode;
3867 if (nonimmediate_operand (op, mode))
3869 if (GET_MODE (op) != mode && mode != VOIDmode)
3871 return (op == CONST0_RTX (GET_MODE (op)));
3874 /* Return true if op if a valid address, and does not contain
3875 a segment override. */
3878 no_seg_address_operand (op, mode)
3880 enum machine_mode mode;
3882 struct ix86_address parts;
3884 if (! address_operand (op, mode))
3887 if (! ix86_decompose_address (op, &parts))
3890 return parts.seg == SEG_DEFAULT;
3893 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3896 sse_comparison_operator (op, mode)
3898 enum machine_mode mode ATTRIBUTE_UNUSED;
3900 enum rtx_code code = GET_CODE (op);
3903 /* Operations supported directly. */
3913 /* These are equivalent to ones above in non-IEEE comparisons. */
3920 return !TARGET_IEEE_FP;
3925 /* Return 1 if OP is a valid comparison operator in valid mode. */
3927 ix86_comparison_operator (op, mode)
3929 enum machine_mode mode;
3931 enum machine_mode inmode;
3932 enum rtx_code code = GET_CODE (op);
3933 if (mode != VOIDmode && GET_MODE (op) != mode)
3935 if (GET_RTX_CLASS (code) != '<')
3937 inmode = GET_MODE (XEXP (op, 0));
3939 if (inmode == CCFPmode || inmode == CCFPUmode)
3941 enum rtx_code second_code, bypass_code;
3942 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3943 return (bypass_code == NIL && second_code == NIL);
3950 if (inmode == CCmode || inmode == CCGCmode
3951 || inmode == CCGOCmode || inmode == CCNOmode)
3954 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3955 if (inmode == CCmode)
3959 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3967 /* Return 1 if OP is a valid comparison operator testing carry flag
3970 ix86_carry_flag_operator (op, mode)
3972 enum machine_mode mode;
3974 enum machine_mode inmode;
3975 enum rtx_code code = GET_CODE (op);
3977 if (mode != VOIDmode && GET_MODE (op) != mode)
3979 if (GET_RTX_CLASS (code) != '<')
3981 inmode = GET_MODE (XEXP (op, 0));
3982 if (GET_CODE (XEXP (op, 0)) != REG
3983 || REGNO (XEXP (op, 0)) != 17
3984 || XEXP (op, 1) != const0_rtx)
3987 if (inmode == CCFPmode || inmode == CCFPUmode)
3989 enum rtx_code second_code, bypass_code;
3991 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3992 if (bypass_code != NIL || second_code != NIL)
3994 code = ix86_fp_compare_code_to_integer (code);
3996 else if (inmode != CCmode)
4001 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4004 fcmov_comparison_operator (op, mode)
4006 enum machine_mode mode;
4008 enum machine_mode inmode;
4009 enum rtx_code code = GET_CODE (op);
4011 if (mode != VOIDmode && GET_MODE (op) != mode)
4013 if (GET_RTX_CLASS (code) != '<')
4015 inmode = GET_MODE (XEXP (op, 0));
4016 if (inmode == CCFPmode || inmode == CCFPUmode)
4018 enum rtx_code second_code, bypass_code;
4020 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4021 if (bypass_code != NIL || second_code != NIL)
4023 code = ix86_fp_compare_code_to_integer (code);
4025 /* i387 supports just limited amount of conditional codes. */
4028 case LTU: case GTU: case LEU: case GEU:
4029 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4032 case ORDERED: case UNORDERED:
4040 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4043 promotable_binary_operator (op, mode)
4045 enum machine_mode mode ATTRIBUTE_UNUSED;
4047 switch (GET_CODE (op))
4050 /* Modern CPUs have same latency for HImode and SImode multiply,
4051 but 386 and 486 do HImode multiply faster. */
4052 return ix86_tune > PROCESSOR_I486;
4064 /* Nearly general operand, but accept any const_double, since we wish
4065 to be able to drop them into memory rather than have them get pulled
4069 cmp_fp_expander_operand (op, mode)
4071 enum machine_mode mode;
4073 if (mode != VOIDmode && mode != GET_MODE (op))
4075 if (GET_CODE (op) == CONST_DOUBLE)
4077 return general_operand (op, mode);
4080 /* Match an SI or HImode register for a zero_extract. */
4083 ext_register_operand (op, mode)
4085 enum machine_mode mode ATTRIBUTE_UNUSED;
4088 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4089 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4092 if (!register_operand (op, VOIDmode))
4095 /* Be careful to accept only registers having upper parts. */
4096 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4097 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4100 /* Return 1 if this is a valid binary floating-point operation.
4101 OP is the expression matched, and MODE is its mode. */
4104 binary_fp_operator (op, mode)
4106 enum machine_mode mode;
4108 if (mode != VOIDmode && mode != GET_MODE (op))
4111 switch (GET_CODE (op))
4117 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4125 mult_operator (op, mode)
4127 enum machine_mode mode ATTRIBUTE_UNUSED;
4129 return GET_CODE (op) == MULT;
4133 div_operator (op, mode)
4135 enum machine_mode mode ATTRIBUTE_UNUSED;
4137 return GET_CODE (op) == DIV;
4141 arith_or_logical_operator (op, mode)
4143 enum machine_mode mode;
4145 return ((mode == VOIDmode || GET_MODE (op) == mode)
4146 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4147 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4150 /* Returns 1 if OP is memory operand with a displacement. */
4153 memory_displacement_operand (op, mode)
4155 enum machine_mode mode;
4157 struct ix86_address parts;
4159 if (! memory_operand (op, mode))
4162 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4165 return parts.disp != NULL_RTX;
4168 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4169 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4171 ??? It seems likely that this will only work because cmpsi is an
4172 expander, and no actual insns use this. */
4175 cmpsi_operand (op, mode)
4177 enum machine_mode mode;
4179 if (nonimmediate_operand (op, mode))
4182 if (GET_CODE (op) == AND
4183 && GET_MODE (op) == SImode
4184 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4185 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4186 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4187 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4188 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4189 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4195 /* Returns 1 if OP is memory operand that can not be represented by the
4199 long_memory_operand (op, mode)
4201 enum machine_mode mode;
4203 if (! memory_operand (op, mode))
4206 return memory_address_length (op) != 0;
4209 /* Return nonzero if the rtx is known aligned. */
4212 aligned_operand (op, mode)
4214 enum machine_mode mode;
4216 struct ix86_address parts;
4218 if (!general_operand (op, mode))
4221 /* Registers and immediate operands are always "aligned". */
4222 if (GET_CODE (op) != MEM)
4225 /* Don't even try to do any aligned optimizations with volatiles. */
4226 if (MEM_VOLATILE_P (op))
4231 /* Pushes and pops are only valid on the stack pointer. */
4232 if (GET_CODE (op) == PRE_DEC
4233 || GET_CODE (op) == POST_INC)
4236 /* Decode the address. */
4237 if (! ix86_decompose_address (op, &parts))
4240 if (parts.base && GET_CODE (parts.base) == SUBREG)
4241 parts.base = SUBREG_REG (parts.base);
4242 if (parts.index && GET_CODE (parts.index) == SUBREG)
4243 parts.index = SUBREG_REG (parts.index);
4245 /* Look for some component that isn't known to be aligned. */
4249 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4254 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4259 if (GET_CODE (parts.disp) != CONST_INT
4260 || (INTVAL (parts.disp) & 3) != 0)
4264 /* Didn't find one -- this must be an aligned address. */
4268 /* Initialize the table of extra 80387 mathematical constants. */
4271 init_ext_80387_constants ()
4273 static const char * cst[5] =
4275 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4276 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4277 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4278 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4279 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4283 for (i = 0; i < 5; i++)
4285 real_from_string (&ext_80387_constants_table[i], cst[i]);
4286 /* Ensure each constant is rounded to XFmode precision. */
4287 real_convert (&ext_80387_constants_table[i], XFmode,
4288 &ext_80387_constants_table[i]);
4291 ext_80387_constants_init = 1;
4294 /* Return true if the constant is something that can be loaded with
4295 a special instruction. */
4298 standard_80387_constant_p (x)
4301 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4304 if (x == CONST0_RTX (GET_MODE (x)))
4306 if (x == CONST1_RTX (GET_MODE (x)))
4309 /* For XFmode constants, try to find a special 80387 instruction on
4310 those CPUs that benefit from them. */
4311 if (GET_MODE (x) == XFmode
4312 && x86_ext_80387_constants & TUNEMASK)
4317 if (! ext_80387_constants_init)
4318 init_ext_80387_constants ();
4320 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4321 for (i = 0; i < 5; i++)
4322 if (real_identical (&r, &ext_80387_constants_table[i]))
4329 /* Return the opcode of the special instruction to be used to load
4333 standard_80387_constant_opcode (x)
4336 switch (standard_80387_constant_p (x))
4356 /* Return the CONST_DOUBLE representing the 80387 constant that is
4357 loaded by the specified special instruction. The argument IDX
4358 matches the return value from standard_80387_constant_p. */
4361 standard_80387_constant_rtx (idx)
4366 if (! ext_80387_constants_init)
4367 init_ext_80387_constants ();
4383 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4386 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4389 standard_sse_constant_p (x)
4392 if (x == const0_rtx)
4394 return (x == CONST0_RTX (GET_MODE (x)));
4397 /* Returns 1 if OP contains a symbol reference */
4400 symbolic_reference_mentioned_p (op)
4403 register const char *fmt;
4406 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4409 fmt = GET_RTX_FORMAT (GET_CODE (op));
4410 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4416 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4417 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4421 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4428 /* Return 1 if it is appropriate to emit `ret' instructions in the
4429 body of a function. Do this only if the epilogue is simple, needing a
4430 couple of insns. Prior to reloading, we can't tell how many registers
4431 must be saved, so return 0 then. Return 0 if there is no frame
4432 marker to de-allocate.
4434 If NON_SAVING_SETJMP is defined and true, then it is not possible
4435 for the epilogue to be simple, so return 0. This is a special case
4436 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4437 until final, but jump_optimize may need to know sooner if a
4441 ix86_can_use_return_insn_p ()
4443 struct ix86_frame frame;
4445 #ifdef NON_SAVING_SETJMP
4446 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4450 if (! reload_completed || frame_pointer_needed)
4453 /* Don't allow more than 32 pop, since that's all we can do
4454 with one instruction. */
4455 if (current_function_pops_args
4456 && current_function_args_size >= 32768)
4459 ix86_compute_frame_layout (&frame);
4460 return frame.to_allocate == 0 && frame.nregs == 0;
4463 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4465 x86_64_sign_extended_value (value)
4468 switch (GET_CODE (value))
4470 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4471 to be at least 32 and this all acceptable constants are
4472 represented as CONST_INT. */
4474 if (HOST_BITS_PER_WIDE_INT == 32)
4478 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4479 return trunc_int_for_mode (val, SImode) == val;
4483 /* For certain code models, the symbolic references are known to fit.
4484 in CM_SMALL_PIC model we know it fits if it is local to the shared
4485 library. Don't count TLS SYMBOL_REFs here, since they should fit
4486 only if inside of UNSPEC handled below. */
4488 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4490 /* For certain code models, the code is near as well. */
4492 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4493 || ix86_cmodel == CM_KERNEL);
4495 /* We also may accept the offsetted memory references in certain special
4498 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4499 switch (XINT (XEXP (value, 0), 1))
4501 case UNSPEC_GOTPCREL:
4503 case UNSPEC_GOTNTPOFF:
4509 if (GET_CODE (XEXP (value, 0)) == PLUS)
4511 rtx op1 = XEXP (XEXP (value, 0), 0);
4512 rtx op2 = XEXP (XEXP (value, 0), 1);
4513 HOST_WIDE_INT offset;
4515 if (ix86_cmodel == CM_LARGE)
4517 if (GET_CODE (op2) != CONST_INT)
4519 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4520 switch (GET_CODE (op1))
4523 /* For CM_SMALL assume that latest object is 16MB before
4524 end of 31bits boundary. We may also accept pretty
4525 large negative constants knowing that all objects are
4526 in the positive half of address space. */
4527 if (ix86_cmodel == CM_SMALL
4528 && offset < 16*1024*1024
4529 && trunc_int_for_mode (offset, SImode) == offset)
4531 /* For CM_KERNEL we know that all object resist in the
4532 negative half of 32bits address space. We may not
4533 accept negative offsets, since they may be just off
4534 and we may accept pretty large positive ones. */
4535 if (ix86_cmodel == CM_KERNEL
4537 && trunc_int_for_mode (offset, SImode) == offset)
4541 /* These conditions are similar to SYMBOL_REF ones, just the
4542 constraints for code models differ. */
4543 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4544 && offset < 16*1024*1024
4545 && trunc_int_for_mode (offset, SImode) == offset)
4547 if (ix86_cmodel == CM_KERNEL
4549 && trunc_int_for_mode (offset, SImode) == offset)
4553 switch (XINT (op1, 1))
4558 && trunc_int_for_mode (offset, SImode) == offset)
4572 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4574 x86_64_zero_extended_value (value)
4577 switch (GET_CODE (value))
4580 if (HOST_BITS_PER_WIDE_INT == 32)
4581 return (GET_MODE (value) == VOIDmode
4582 && !CONST_DOUBLE_HIGH (value));
4586 if (HOST_BITS_PER_WIDE_INT == 32)
4587 return INTVAL (value) >= 0;
4589 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4592 /* For certain code models, the symbolic references are known to fit. */
4594 return ix86_cmodel == CM_SMALL;
4596 /* For certain code models, the code is near as well. */
4598 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4600 /* We also may accept the offsetted memory references in certain special
4603 if (GET_CODE (XEXP (value, 0)) == PLUS)
4605 rtx op1 = XEXP (XEXP (value, 0), 0);
4606 rtx op2 = XEXP (XEXP (value, 0), 1);
4608 if (ix86_cmodel == CM_LARGE)
4610 switch (GET_CODE (op1))
4614 /* For small code model we may accept pretty large positive
4615 offsets, since one bit is available for free. Negative
4616 offsets are limited by the size of NULL pointer area
4617 specified by the ABI. */
4618 if (ix86_cmodel == CM_SMALL
4619 && GET_CODE (op2) == CONST_INT
4620 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4621 && (trunc_int_for_mode (INTVAL (op2), SImode)
4624 /* ??? For the kernel, we may accept adjustment of
4625 -0x10000000, since we know that it will just convert
4626 negative address space to positive, but perhaps this
4627 is not worthwhile. */
4630 /* These conditions are similar to SYMBOL_REF ones, just the
4631 constraints for code models differ. */
4632 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4633 && GET_CODE (op2) == CONST_INT
4634 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4635 && (trunc_int_for_mode (INTVAL (op2), SImode)
4649 /* Value should be nonzero if functions must have frame pointers.
4650 Zero means the frame pointer need not be set up (and parms may
4651 be accessed via the stack pointer) in functions that seem suitable. */
4654 ix86_frame_pointer_required ()
4656 /* If we accessed previous frames, then the generated code expects
4657 to be able to access the saved ebp value in our frame. */
4658 if (cfun->machine->accesses_prev_frame)
4661 /* Several x86 os'es need a frame pointer for other reasons,
4662 usually pertaining to setjmp. */
4663 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4666 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4667 the frame pointer by default. Turn it back on now if we've not
4668 got a leaf function. */
4669 if (TARGET_OMIT_LEAF_FRAME_POINTER
4670 && (!current_function_is_leaf))
4673 if (current_function_profile)
4679 /* Record that the current function accesses previous call frames. */
4682 ix86_setup_frame_addresses ()
4684 cfun->machine->accesses_prev_frame = 1;
4687 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4688 # define USE_HIDDEN_LINKONCE 1
4690 # define USE_HIDDEN_LINKONCE 0
4693 static int pic_labels_used;
4695 /* Fills in the label name that should be used for a pc thunk for
4696 the given register. */
4699 get_pc_thunk_name (name, regno)
4703 if (USE_HIDDEN_LINKONCE)
4704 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4706 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4710 /* This function generates code for -fpic that loads %ebx with
4711 the return address of the caller and then returns. */
4719 for (regno = 0; regno < 8; ++regno)
4723 if (! ((pic_labels_used >> regno) & 1))
4726 get_pc_thunk_name (name, regno);
4728 if (USE_HIDDEN_LINKONCE)
4732 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4734 TREE_PUBLIC (decl) = 1;
4735 TREE_STATIC (decl) = 1;
4736 DECL_ONE_ONLY (decl) = 1;
4738 (*targetm.asm_out.unique_section) (decl, 0);
4739 named_section (decl, NULL, 0);
4741 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4742 fputs ("\t.hidden\t", asm_out_file);
4743 assemble_name (asm_out_file, name);
4744 fputc ('\n', asm_out_file);
4745 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4750 ASM_OUTPUT_LABEL (asm_out_file, name);
4753 xops[0] = gen_rtx_REG (SImode, regno);
4754 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4755 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4756 output_asm_insn ("ret", xops);
4759 if (NEED_INDICATE_EXEC_STACK)
4760 file_end_indicate_exec_stack ();
4763 /* Emit code for the SET_GOT patterns. */
4766 output_set_got (dest)
4772 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4774 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4776 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4779 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4781 output_asm_insn ("call\t%a2", xops);
4784 /* Output the "canonical" label name ("Lxx$pb") here too. This
4785 is what will be referred to by the Mach-O PIC subsystem. */
4786 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4788 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4789 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4792 output_asm_insn ("pop{l}\t%0", xops);
4797 get_pc_thunk_name (name, REGNO (dest));
4798 pic_labels_used |= 1 << REGNO (dest);
4800 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4801 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4802 output_asm_insn ("call\t%X2", xops);
4805 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4806 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4807 else if (!TARGET_MACHO)
4808 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4813 /* Generate an "push" pattern for input ARG. */
4819 return gen_rtx_SET (VOIDmode,
4821 gen_rtx_PRE_DEC (Pmode,
4822 stack_pointer_rtx)),
4826 /* Return >= 0 if there is an unused call-clobbered register available
4827 for the entire function. */
4830 ix86_select_alt_pic_regnum ()
4832 if (current_function_is_leaf && !current_function_profile)
4835 for (i = 2; i >= 0; --i)
4836 if (!regs_ever_live[i])
4840 return INVALID_REGNUM;
4843 /* Return 1 if we need to save REGNO. */
4845 ix86_save_reg (regno, maybe_eh_return)
4847 int maybe_eh_return;
4849 if (pic_offset_table_rtx
4850 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4851 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4852 || current_function_profile
4853 || current_function_calls_eh_return
4854 || current_function_uses_const_pool))
4856 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4861 if (current_function_calls_eh_return && maybe_eh_return)
4866 unsigned test = EH_RETURN_DATA_REGNO (i);
4867 if (test == INVALID_REGNUM)
4874 return (regs_ever_live[regno]
4875 && !call_used_regs[regno]
4876 && !fixed_regs[regno]
4877 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4880 /* Return number of registers to be saved on the stack. */
4888 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4889 if (ix86_save_reg (regno, true))
4894 /* Return the offset between two registers, one to be eliminated, and the other
4895 its replacement, at the start of a routine. */
4898 ix86_initial_elimination_offset (from, to)
4902 struct ix86_frame frame;
4903 ix86_compute_frame_layout (&frame);
4905 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4906 return frame.hard_frame_pointer_offset;
4907 else if (from == FRAME_POINTER_REGNUM
4908 && to == HARD_FRAME_POINTER_REGNUM)
4909 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4912 if (to != STACK_POINTER_REGNUM)
4914 else if (from == ARG_POINTER_REGNUM)
4915 return frame.stack_pointer_offset;
4916 else if (from != FRAME_POINTER_REGNUM)
4919 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4923 /* Fill structure ix86_frame about frame of currently computed function. */
4926 ix86_compute_frame_layout (frame)
4927 struct ix86_frame *frame;
4929 HOST_WIDE_INT total_size;
4930 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4932 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4933 HOST_WIDE_INT size = get_frame_size ();
4935 frame->nregs = ix86_nsaved_regs ();
4938 /* During reload iteration the amount of registers saved can change.
4939 Recompute the value as needed. Do not recompute when amount of registers
4940 didn't change as reload does mutiple calls to the function and does not
4941 expect the decision to change within single iteration. */
4943 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4945 int count = frame->nregs;
4947 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4948 /* The fast prologue uses move instead of push to save registers. This
4949 is significantly longer, but also executes faster as modern hardware
4950 can execute the moves in parallel, but can't do that for push/pop.
4952 Be careful about choosing what prologue to emit: When function takes
4953 many instructions to execute we may use slow version as well as in
4954 case function is known to be outside hot spot (this is known with
4955 feedback only). Weight the size of function by number of registers
4956 to save as it is cheap to use one or two push instructions but very
4957 slow to use many of them. */
4959 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4960 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4961 || (flag_branch_probabilities
4962 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4963 cfun->machine->use_fast_prologue_epilogue = false;
4965 cfun->machine->use_fast_prologue_epilogue
4966 = !expensive_function_p (count);
4968 if (TARGET_PROLOGUE_USING_MOVE
4969 && cfun->machine->use_fast_prologue_epilogue)
4970 frame->save_regs_using_mov = true;
4972 frame->save_regs_using_mov = false;
4975 /* Skip return address and saved base pointer. */
4976 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4978 frame->hard_frame_pointer_offset = offset;
4980 /* Do some sanity checking of stack_alignment_needed and
4981 preferred_alignment, since i386 port is the only using those features
4982 that may break easily. */
4984 if (size && !stack_alignment_needed)
4986 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4988 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4990 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4993 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4994 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4996 /* Register save area */
4997 offset += frame->nregs * UNITS_PER_WORD;
5000 if (ix86_save_varrargs_registers)
5002 offset += X86_64_VARARGS_SIZE;
5003 frame->va_arg_size = X86_64_VARARGS_SIZE;
5006 frame->va_arg_size = 0;
5008 /* Align start of frame for local function. */
5009 frame->padding1 = ((offset + stack_alignment_needed - 1)
5010 & -stack_alignment_needed) - offset;
5012 offset += frame->padding1;
5014 /* Frame pointer points here. */
5015 frame->frame_pointer_offset = offset;
5019 /* Add outgoing arguments area. Can be skipped if we eliminated
5020 all the function calls as dead code. */
5021 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5023 offset += current_function_outgoing_args_size;
5024 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5027 frame->outgoing_arguments_size = 0;
5029 /* Align stack boundary. Only needed if we're calling another function
5031 if (!current_function_is_leaf || current_function_calls_alloca)
5032 frame->padding2 = ((offset + preferred_alignment - 1)
5033 & -preferred_alignment) - offset;
5035 frame->padding2 = 0;
5037 offset += frame->padding2;
5039 /* We've reached end of stack frame. */
5040 frame->stack_pointer_offset = offset;
5042 /* Size prologue needs to allocate. */
5043 frame->to_allocate =
5044 (size + frame->padding1 + frame->padding2
5045 + frame->outgoing_arguments_size + frame->va_arg_size);
5047 if (!frame->to_allocate && frame->nregs <= 1)
5048 frame->save_regs_using_mov = false;
5050 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5051 && current_function_is_leaf)
5053 frame->red_zone_size = frame->to_allocate;
5054 if (frame->save_regs_using_mov)
5055 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5056 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5057 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5060 frame->red_zone_size = 0;
5061 frame->to_allocate -= frame->red_zone_size;
5062 frame->stack_pointer_offset -= frame->red_zone_size;
5064 fprintf (stderr, "nregs: %i\n", frame->nregs);
5065 fprintf (stderr, "size: %i\n", size);
5066 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5067 fprintf (stderr, "padding1: %i\n", frame->padding1);
5068 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5069 fprintf (stderr, "padding2: %i\n", frame->padding2);
5070 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5071 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5072 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5073 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5074 frame->hard_frame_pointer_offset);
5075 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5079 /* Emit code to save registers in the prologue. */
5082 ix86_emit_save_regs ()
5087 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5088 if (ix86_save_reg (regno, true))
5090 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5091 RTX_FRAME_RELATED_P (insn) = 1;
5095 /* Emit code to save registers using MOV insns. First register
5096 is restored from POINTER + OFFSET. */
5098 ix86_emit_save_regs_using_mov (pointer, offset)
5100 HOST_WIDE_INT offset;
5105 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5106 if (ix86_save_reg (regno, true))
5108 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5110 gen_rtx_REG (Pmode, regno));
5111 RTX_FRAME_RELATED_P (insn) = 1;
5112 offset += UNITS_PER_WORD;
5116 /* Expand the prologue into a bunch of separate insns. */
5119 ix86_expand_prologue ()
5123 struct ix86_frame frame;
5124 HOST_WIDE_INT allocate;
5126 ix86_compute_frame_layout (&frame);
5128 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5129 slower on all targets. Also sdb doesn't like it. */
5131 if (frame_pointer_needed)
5133 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5134 RTX_FRAME_RELATED_P (insn) = 1;
5136 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5137 RTX_FRAME_RELATED_P (insn) = 1;
5140 allocate = frame.to_allocate;
5142 if (!frame.save_regs_using_mov)
5143 ix86_emit_save_regs ();
5145 allocate += frame.nregs * UNITS_PER_WORD;
5147 /* When using red zone we may start register saving before allocating
5148 the stack frame saving one cycle of the prologue. */
5149 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5150 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5151 : stack_pointer_rtx,
5152 -frame.nregs * UNITS_PER_WORD);
5156 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5158 insn = emit_insn (gen_pro_epilogue_adjust_stack
5159 (stack_pointer_rtx, stack_pointer_rtx,
5160 GEN_INT (-allocate)));
5161 RTX_FRAME_RELATED_P (insn) = 1;
5165 /* ??? Is this only valid for Win32? */
5172 arg0 = gen_rtx_REG (SImode, 0);
5173 emit_move_insn (arg0, GEN_INT (allocate));
5175 sym = gen_rtx_MEM (FUNCTION_MODE,
5176 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5177 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5179 CALL_INSN_FUNCTION_USAGE (insn)
5180 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5181 CALL_INSN_FUNCTION_USAGE (insn));
5183 /* Don't allow scheduling pass to move insns across __alloca
5185 emit_insn (gen_blockage (const0_rtx));
5187 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5189 if (!frame_pointer_needed || !frame.to_allocate)
5190 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5192 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5193 -frame.nregs * UNITS_PER_WORD);
5196 pic_reg_used = false;
5197 if (pic_offset_table_rtx
5198 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5199 || current_function_profile))
5201 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5203 if (alt_pic_reg_used != INVALID_REGNUM)
5204 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5206 pic_reg_used = true;
5211 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5213 /* Even with accurate pre-reload life analysis, we can wind up
5214 deleting all references to the pic register after reload.
5215 Consider if cross-jumping unifies two sides of a branch
5216 controlled by a comparison vs the only read from a global.
5217 In which case, allow the set_got to be deleted, though we're
5218 too late to do anything about the ebx save in the prologue. */
5219 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5222 /* Prevent function calls from be scheduled before the call to mcount.
5223 In the pic_reg_used case, make sure that the got load isn't deleted. */
5224 if (current_function_profile)
5225 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5228 /* Emit code to restore saved registers using MOV insns. First register
5229 is restored from POINTER + OFFSET. */
5231 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5234 int maybe_eh_return;
5238 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5239 if (ix86_save_reg (regno, maybe_eh_return))
5241 emit_move_insn (gen_rtx_REG (Pmode, regno),
5242 adjust_address (gen_rtx_MEM (Pmode, pointer),
5244 offset += UNITS_PER_WORD;
5248 /* Restore function stack, frame, and registers. */
5251 ix86_expand_epilogue (style)
5255 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5256 struct ix86_frame frame;
5257 HOST_WIDE_INT offset;
5259 ix86_compute_frame_layout (&frame);
5261 /* Calculate start of saved registers relative to ebp. Special care
5262 must be taken for the normal return case of a function using
5263 eh_return: the eax and edx registers are marked as saved, but not
5264 restored along this path. */
5265 offset = frame.nregs;
5266 if (current_function_calls_eh_return && style != 2)
5268 offset *= -UNITS_PER_WORD;
5270 /* If we're only restoring one register and sp is not valid then
5271 using a move instruction to restore the register since it's
5272 less work than reloading sp and popping the register.
5274 The default code result in stack adjustment using add/lea instruction,
5275 while this code results in LEAVE instruction (or discrete equivalent),
5276 so it is profitable in some other cases as well. Especially when there
5277 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5278 and there is exactly one register to pop. This heuristic may need some
5279 tuning in future. */
5280 if ((!sp_valid && frame.nregs <= 1)
5281 || (TARGET_EPILOGUE_USING_MOVE
5282 && cfun->machine->use_fast_prologue_epilogue
5283 && (frame.nregs > 1 || frame.to_allocate))
5284 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5285 || (frame_pointer_needed && TARGET_USE_LEAVE
5286 && cfun->machine->use_fast_prologue_epilogue
5287 && frame.nregs == 1)
5288 || current_function_calls_eh_return)
5290 /* Restore registers. We can use ebp or esp to address the memory
5291 locations. If both are available, default to ebp, since offsets
5292 are known to be small. Only exception is esp pointing directly to the
5293 end of block of saved registers, where we may simplify addressing
5296 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5297 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5298 frame.to_allocate, style == 2);
5300 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5301 offset, style == 2);
5303 /* eh_return epilogues need %ecx added to the stack pointer. */
5306 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5308 if (frame_pointer_needed)
5310 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5311 tmp = plus_constant (tmp, UNITS_PER_WORD);
5312 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5314 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5315 emit_move_insn (hard_frame_pointer_rtx, tmp);
5317 emit_insn (gen_pro_epilogue_adjust_stack
5318 (stack_pointer_rtx, sa, const0_rtx));
5322 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5323 tmp = plus_constant (tmp, (frame.to_allocate
5324 + frame.nregs * UNITS_PER_WORD));
5325 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5328 else if (!frame_pointer_needed)
5329 emit_insn (gen_pro_epilogue_adjust_stack
5330 (stack_pointer_rtx, stack_pointer_rtx,
5331 GEN_INT (frame.to_allocate
5332 + frame.nregs * UNITS_PER_WORD)));
5333 /* If not an i386, mov & pop is faster than "leave". */
5334 else if (TARGET_USE_LEAVE || optimize_size
5335 || !cfun->machine->use_fast_prologue_epilogue)
5336 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5339 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5340 hard_frame_pointer_rtx,
5343 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5345 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5350 /* First step is to deallocate the stack frame so that we can
5351 pop the registers. */
5354 if (!frame_pointer_needed)
5356 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5357 hard_frame_pointer_rtx,
5360 else if (frame.to_allocate)
5361 emit_insn (gen_pro_epilogue_adjust_stack
5362 (stack_pointer_rtx, stack_pointer_rtx,
5363 GEN_INT (frame.to_allocate)));
5365 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5366 if (ix86_save_reg (regno, false))
5369 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5371 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5373 if (frame_pointer_needed)
5375 /* Leave results in shorter dependency chains on CPUs that are
5376 able to grok it fast. */
5377 if (TARGET_USE_LEAVE)
5378 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5379 else if (TARGET_64BIT)
5380 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5382 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5386 /* Sibcall epilogues don't want a return instruction. */
5390 if (current_function_pops_args && current_function_args_size)
5392 rtx popc = GEN_INT (current_function_pops_args);
5394 /* i386 can only pop 64K bytes. If asked to pop more, pop
5395 return address, do explicit add, and jump indirectly to the
5398 if (current_function_pops_args >= 65536)
5400 rtx ecx = gen_rtx_REG (SImode, 2);
5402 /* There are is no "pascal" calling convention in 64bit ABI. */
5406 emit_insn (gen_popsi1 (ecx));
5407 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5408 emit_jump_insn (gen_return_indirect_internal (ecx));
5411 emit_jump_insn (gen_return_pop_internal (popc));
5414 emit_jump_insn (gen_return_internal ());
5417 /* Reset from the function's potential modifications. */
5420 ix86_output_function_epilogue (file, size)
5421 FILE *file ATTRIBUTE_UNUSED;
5422 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5424 if (pic_offset_table_rtx)
5425 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5428 /* Extract the parts of an RTL expression that is a valid memory address
5429 for an instruction. Return 0 if the structure of the address is
5430 grossly off. Return -1 if the address contains ASHIFT, so it is not
5431 strictly valid, but still used for computing length of lea instruction. */
5434 ix86_decompose_address (addr, out)
5436 struct ix86_address *out;
5438 rtx base = NULL_RTX;
5439 rtx index = NULL_RTX;
5440 rtx disp = NULL_RTX;
5441 HOST_WIDE_INT scale = 1;
5442 rtx scale_rtx = NULL_RTX;
5444 enum ix86_address_seg seg = SEG_DEFAULT;
5446 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5448 else if (GET_CODE (addr) == PLUS)
5458 addends[n++] = XEXP (op, 1);
5461 while (GET_CODE (op) == PLUS);
5466 for (i = n; i >= 0; --i)
5469 switch (GET_CODE (op))
5474 index = XEXP (op, 0);
5475 scale_rtx = XEXP (op, 1);
5479 if (XINT (op, 1) == UNSPEC_TP
5480 && TARGET_TLS_DIRECT_SEG_REFS
5481 && seg == SEG_DEFAULT)
5482 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5511 else if (GET_CODE (addr) == MULT)
5513 index = XEXP (addr, 0); /* index*scale */
5514 scale_rtx = XEXP (addr, 1);
5516 else if (GET_CODE (addr) == ASHIFT)
5520 /* We're called for lea too, which implements ashift on occasion. */
5521 index = XEXP (addr, 0);
5522 tmp = XEXP (addr, 1);
5523 if (GET_CODE (tmp) != CONST_INT)
5525 scale = INTVAL (tmp);
5526 if ((unsigned HOST_WIDE_INT) scale > 3)
5532 disp = addr; /* displacement */
5534 /* Extract the integral value of scale. */
5537 if (GET_CODE (scale_rtx) != CONST_INT)
5539 scale = INTVAL (scale_rtx);
5542 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5543 if (base && index && scale == 1
5544 && (index == arg_pointer_rtx
5545 || index == frame_pointer_rtx
5546 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5553 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5554 if ((base == hard_frame_pointer_rtx
5555 || base == frame_pointer_rtx
5556 || base == arg_pointer_rtx) && !disp)
5559 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5560 Avoid this by transforming to [%esi+0]. */
5561 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5562 && base && !index && !disp
5564 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5567 /* Special case: encode reg+reg instead of reg*2. */
5568 if (!base && index && scale && scale == 2)
5569 base = index, scale = 1;
5571 /* Special case: scaling cannot be encoded without base or displacement. */
5572 if (!base && !disp && index && scale != 1)
5584 /* Return cost of the memory address x.
5585 For i386, it is better to use a complex address than let gcc copy
5586 the address into a reg and make a new pseudo. But not if the address
5587 requires to two regs - that would mean more pseudos with longer
5590 ix86_address_cost (x)
5593 struct ix86_address parts;
5596 if (!ix86_decompose_address (x, &parts))
5599 if (parts.base && GET_CODE (parts.base) == SUBREG)
5600 parts.base = SUBREG_REG (parts.base);
5601 if (parts.index && GET_CODE (parts.index) == SUBREG)
5602 parts.index = SUBREG_REG (parts.index);
5604 /* More complex memory references are better. */
5605 if (parts.disp && parts.disp != const0_rtx)
5607 if (parts.seg != SEG_DEFAULT)
5610 /* Attempt to minimize number of registers in the address. */
5612 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5614 && (!REG_P (parts.index)
5615 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5619 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5621 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5622 && parts.base != parts.index)
5625 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5626 since it's predecode logic can't detect the length of instructions
5627 and it degenerates to vector decoded. Increase cost of such
5628 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5629 to split such addresses or even refuse such addresses at all.
5631 Following addressing modes are affected:
5636 The first and last case may be avoidable by explicitly coding the zero in
5637 memory address, but I don't have AMD-K6 machine handy to check this
5641 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5642 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5643 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5649 /* If X is a machine specific address (i.e. a symbol or label being
5650 referenced as a displacement from the GOT implemented using an
5651 UNSPEC), then return the base term. Otherwise return X. */
5654 ix86_find_base_term (x)
5661 if (GET_CODE (x) != CONST)
5664 if (GET_CODE (term) == PLUS
5665 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5666 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5667 term = XEXP (term, 0);
5668 if (GET_CODE (term) != UNSPEC
5669 || XINT (term, 1) != UNSPEC_GOTPCREL)
5672 term = XVECEXP (term, 0, 0);
5674 if (GET_CODE (term) != SYMBOL_REF
5675 && GET_CODE (term) != LABEL_REF)
5681 term = ix86_delegitimize_address (x);
5683 if (GET_CODE (term) != SYMBOL_REF
5684 && GET_CODE (term) != LABEL_REF)
5690 /* Determine if a given RTX is a valid constant. We already know this
5691 satisfies CONSTANT_P. */
5694 legitimate_constant_p (x)
5699 switch (GET_CODE (x))
5702 /* TLS symbols are not constant. */
5703 if (tls_symbolic_operand (x, Pmode))
5708 inner = XEXP (x, 0);
5710 /* Offsets of TLS symbols are never valid.
5711 Discourage CSE from creating them. */
5712 if (GET_CODE (inner) == PLUS
5713 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5716 if (GET_CODE (inner) == PLUS)
5718 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5720 inner = XEXP (inner, 0);
5723 /* Only some unspecs are valid as "constants". */
5724 if (GET_CODE (inner) == UNSPEC)
5725 switch (XINT (inner, 1))
5729 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5731 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5741 /* Otherwise we handle everything else in the move patterns. */
5745 /* Determine if it's legal to put X into the constant pool. This
5746 is not possible for the address of thread-local symbols, which
5747 is checked above. */
5750 ix86_cannot_force_const_mem (x)
5753 return !legitimate_constant_p (x);
5756 /* Determine if a given RTX is a valid constant address. */
5759 constant_address_p (x)
5762 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5765 /* Nonzero if the constant value X is a legitimate general operand
5766 when generating PIC code. It is given that flag_pic is on and
5767 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5770 legitimate_pic_operand_p (x)
5775 switch (GET_CODE (x))
5778 inner = XEXP (x, 0);
5780 /* Only some unspecs are valid as "constants". */
5781 if (GET_CODE (inner) == UNSPEC)
5782 switch (XINT (inner, 1))
5785 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5793 return legitimate_pic_address_disp_p (x);
5800 /* Determine if a given CONST RTX is a valid memory displacement
5804 legitimate_pic_address_disp_p (disp)
5809 /* In 64bit mode we can allow direct addresses of symbols and labels
5810 when they are not dynamic symbols. */
5813 /* TLS references should always be enclosed in UNSPEC. */
5814 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5816 if (GET_CODE (disp) == SYMBOL_REF
5817 && ix86_cmodel == CM_SMALL_PIC
5818 && SYMBOL_REF_LOCAL_P (disp))
5820 if (GET_CODE (disp) == LABEL_REF)
5822 if (GET_CODE (disp) == CONST
5823 && GET_CODE (XEXP (disp, 0)) == PLUS
5824 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5825 && ix86_cmodel == CM_SMALL_PIC
5826 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5827 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5828 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5829 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5830 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5833 if (GET_CODE (disp) != CONST)
5835 disp = XEXP (disp, 0);
5839 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5840 of GOT tables. We should not need these anyway. */
5841 if (GET_CODE (disp) != UNSPEC
5842 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5845 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5846 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5852 if (GET_CODE (disp) == PLUS)
5854 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5856 disp = XEXP (disp, 0);
5860 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5861 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5863 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5864 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5865 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5867 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5868 if (strstr (sym_name, "$pb") != 0)
5873 if (GET_CODE (disp) != UNSPEC)
5876 switch (XINT (disp, 1))
5881 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5883 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5884 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5885 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5887 case UNSPEC_GOTTPOFF:
5888 case UNSPEC_GOTNTPOFF:
5889 case UNSPEC_INDNTPOFF:
5892 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5894 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5896 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5902 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5903 memory address for an instruction. The MODE argument is the machine mode
5904 for the MEM expression that wants to use this address.
5906 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5907 convert common non-canonical forms to canonical form so that they will
5911 legitimate_address_p (mode, addr, strict)
5912 enum machine_mode mode;
5916 struct ix86_address parts;
5917 rtx base, index, disp;
5918 HOST_WIDE_INT scale;
5919 const char *reason = NULL;
5920 rtx reason_rtx = NULL_RTX;
5922 if (TARGET_DEBUG_ADDR)
5925 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5926 GET_MODE_NAME (mode), strict);
5930 if (ix86_decompose_address (addr, &parts) <= 0)
5932 reason = "decomposition failed";
5937 index = parts.index;
5939 scale = parts.scale;
5941 /* Validate base register.
5943 Don't allow SUBREG's here, it can lead to spill failures when the base
5944 is one word out of a two word structure, which is represented internally
5952 if (GET_CODE (base) == SUBREG)
5953 reg = SUBREG_REG (base);
5957 if (GET_CODE (reg) != REG)
5959 reason = "base is not a register";
5963 if (GET_MODE (base) != Pmode)
5965 reason = "base is not in Pmode";
5969 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5970 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5972 reason = "base is not valid";
5977 /* Validate index register.
5979 Don't allow SUBREG's here, it can lead to spill failures when the index
5980 is one word out of a two word structure, which is represented internally
5988 if (GET_CODE (index) == SUBREG)
5989 reg = SUBREG_REG (index);
5993 if (GET_CODE (reg) != REG)
5995 reason = "index is not a register";
5999 if (GET_MODE (index) != Pmode)
6001 reason = "index is not in Pmode";
6005 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6006 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6008 reason = "index is not valid";
6013 /* Validate scale factor. */
6016 reason_rtx = GEN_INT (scale);
6019 reason = "scale without index";
6023 if (scale != 2 && scale != 4 && scale != 8)
6025 reason = "scale is not a valid multiplier";
6030 /* Validate displacement. */
6035 if (GET_CODE (disp) == CONST
6036 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6037 switch (XINT (XEXP (disp, 0), 1))
6041 case UNSPEC_GOTPCREL:
6044 goto is_legitimate_pic;
6046 case UNSPEC_GOTTPOFF:
6047 case UNSPEC_GOTNTPOFF:
6048 case UNSPEC_INDNTPOFF:
6054 reason = "invalid address unspec";
6058 else if (flag_pic && (SYMBOLIC_CONST (disp)
6060 && !machopic_operand_p (disp)
6065 if (TARGET_64BIT && (index || base))
6067 /* foo@dtpoff(%rX) is ok. */
6068 if (GET_CODE (disp) != CONST
6069 || GET_CODE (XEXP (disp, 0)) != PLUS
6070 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6071 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6072 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6073 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6075 reason = "non-constant pic memory reference";
6079 else if (! legitimate_pic_address_disp_p (disp))
6081 reason = "displacement is an invalid pic construct";
6085 /* This code used to verify that a symbolic pic displacement
6086 includes the pic_offset_table_rtx register.
6088 While this is good idea, unfortunately these constructs may
6089 be created by "adds using lea" optimization for incorrect
6098 This code is nonsensical, but results in addressing
6099 GOT table with pic_offset_table_rtx base. We can't
6100 just refuse it easily, since it gets matched by
6101 "addsi3" pattern, that later gets split to lea in the
6102 case output register differs from input. While this
6103 can be handled by separate addsi pattern for this case
6104 that never results in lea, this seems to be easier and
6105 correct fix for crash to disable this test. */
6107 else if (GET_CODE (disp) != LABEL_REF
6108 && GET_CODE (disp) != CONST_INT
6109 && (GET_CODE (disp) != CONST
6110 || !legitimate_constant_p (disp))
6111 && (GET_CODE (disp) != SYMBOL_REF
6112 || !legitimate_constant_p (disp)))
6114 reason = "displacement is not constant";
6117 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6119 reason = "displacement is out of range";
6124 /* Everything looks valid. */
6125 if (TARGET_DEBUG_ADDR)
6126 fprintf (stderr, "Success.\n");
6130 if (TARGET_DEBUG_ADDR)
6132 fprintf (stderr, "Error: %s\n", reason);
6133 debug_rtx (reason_rtx);
6138 /* Return an unique alias set for the GOT. */
6140 static HOST_WIDE_INT
6141 ix86_GOT_alias_set ()
6143 static HOST_WIDE_INT set = -1;
6145 set = new_alias_set ();
6149 /* Return a legitimate reference for ORIG (an address) using the
6150 register REG. If REG is 0, a new pseudo is generated.
6152 There are two types of references that must be handled:
6154 1. Global data references must load the address from the GOT, via
6155 the PIC reg. An insn is emitted to do this load, and the reg is
6158 2. Static data references, constant pool addresses, and code labels
6159 compute the address as an offset from the GOT, whose base is in
6160 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6161 differentiate them from global data objects. The returned
6162 address is the PIC reg + an unspec constant.
6164 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6165 reg also appears in the address. */
6168 legitimize_pic_address (orig, reg)
6178 reg = gen_reg_rtx (Pmode);
6179 /* Use the generic Mach-O PIC machinery. */
6180 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6183 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6185 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6187 /* This symbol may be referenced via a displacement from the PIC
6188 base address (@GOTOFF). */
6190 if (reload_in_progress)
6191 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6192 if (GET_CODE (addr) == CONST)
6193 addr = XEXP (addr, 0);
6194 if (GET_CODE (addr) == PLUS)
6196 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6197 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6200 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6201 new = gen_rtx_CONST (Pmode, new);
6202 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6206 emit_move_insn (reg, new);
6210 else if (GET_CODE (addr) == SYMBOL_REF)
6214 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6215 new = gen_rtx_CONST (Pmode, new);
6216 new = gen_rtx_MEM (Pmode, new);
6217 RTX_UNCHANGING_P (new) = 1;
6218 set_mem_alias_set (new, ix86_GOT_alias_set ());
6221 reg = gen_reg_rtx (Pmode);
6222 /* Use directly gen_movsi, otherwise the address is loaded
6223 into register for CSE. We don't want to CSE this addresses,
6224 instead we CSE addresses from the GOT table, so skip this. */
6225 emit_insn (gen_movsi (reg, new));
6230 /* This symbol must be referenced via a load from the
6231 Global Offset Table (@GOT). */
6233 if (reload_in_progress)
6234 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6235 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6236 new = gen_rtx_CONST (Pmode, new);
6237 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6238 new = gen_rtx_MEM (Pmode, new);
6239 RTX_UNCHANGING_P (new) = 1;
6240 set_mem_alias_set (new, ix86_GOT_alias_set ());
6243 reg = gen_reg_rtx (Pmode);
6244 emit_move_insn (reg, new);
6250 if (GET_CODE (addr) == CONST)
6252 addr = XEXP (addr, 0);
6254 /* We must match stuff we generate before. Assume the only
6255 unspecs that can get here are ours. Not that we could do
6256 anything with them anyway... */
6257 if (GET_CODE (addr) == UNSPEC
6258 || (GET_CODE (addr) == PLUS
6259 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6261 if (GET_CODE (addr) != PLUS)
6264 if (GET_CODE (addr) == PLUS)
6266 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6268 /* Check first to see if this is a constant offset from a @GOTOFF
6269 symbol reference. */
6270 if (local_symbolic_operand (op0, Pmode)
6271 && GET_CODE (op1) == CONST_INT)
6275 if (reload_in_progress)
6276 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6277 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6279 new = gen_rtx_PLUS (Pmode, new, op1);
6280 new = gen_rtx_CONST (Pmode, new);
6281 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6285 emit_move_insn (reg, new);
6291 if (INTVAL (op1) < -16*1024*1024
6292 || INTVAL (op1) >= 16*1024*1024)
6293 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6298 base = legitimize_pic_address (XEXP (addr, 0), reg);
6299 new = legitimize_pic_address (XEXP (addr, 1),
6300 base == reg ? NULL_RTX : reg);
6302 if (GET_CODE (new) == CONST_INT)
6303 new = plus_constant (base, INTVAL (new));
6306 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6308 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6309 new = XEXP (new, 1);
6311 new = gen_rtx_PLUS (Pmode, base, new);
6319 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6322 get_thread_pointer (to_reg)
6327 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6331 reg = gen_reg_rtx (Pmode);
6332 insn = gen_rtx_SET (VOIDmode, reg, tp);
6333 insn = emit_insn (insn);
6338 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6339 false if we expect this to be used for a memory address and true if
6340 we expect to load the address into a register. */
6343 legitimize_tls_address (x, model, for_mov)
6345 enum tls_model model;
6348 rtx dest, base, off, pic;
6353 case TLS_MODEL_GLOBAL_DYNAMIC:
6354 dest = gen_reg_rtx (Pmode);
6357 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6360 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6361 insns = get_insns ();
6364 emit_libcall_block (insns, dest, rax, x);
6367 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6370 case TLS_MODEL_LOCAL_DYNAMIC:
6371 base = gen_reg_rtx (Pmode);
6374 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6377 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6378 insns = get_insns ();
6381 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6382 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6383 emit_libcall_block (insns, base, rax, note);
6386 emit_insn (gen_tls_local_dynamic_base_32 (base));
6388 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6389 off = gen_rtx_CONST (Pmode, off);
6391 return gen_rtx_PLUS (Pmode, base, off);
6393 case TLS_MODEL_INITIAL_EXEC:
6397 type = UNSPEC_GOTNTPOFF;
6401 if (reload_in_progress)
6402 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6403 pic = pic_offset_table_rtx;
6404 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6406 else if (!TARGET_GNU_TLS)
6408 pic = gen_reg_rtx (Pmode);
6409 emit_insn (gen_set_got (pic));
6410 type = UNSPEC_GOTTPOFF;
6415 type = UNSPEC_INDNTPOFF;
6418 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6419 off = gen_rtx_CONST (Pmode, off);
6421 off = gen_rtx_PLUS (Pmode, pic, off);
6422 off = gen_rtx_MEM (Pmode, off);
6423 RTX_UNCHANGING_P (off) = 1;
6424 set_mem_alias_set (off, ix86_GOT_alias_set ());
6426 if (TARGET_64BIT || TARGET_GNU_TLS)
6428 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6429 off = force_reg (Pmode, off);
6430 return gen_rtx_PLUS (Pmode, base, off);
6434 base = get_thread_pointer (true);
6435 dest = gen_reg_rtx (Pmode);
6436 emit_insn (gen_subsi3 (dest, base, off));
6440 case TLS_MODEL_LOCAL_EXEC:
6441 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6442 (TARGET_64BIT || TARGET_GNU_TLS)
6443 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6444 off = gen_rtx_CONST (Pmode, off);
6446 if (TARGET_64BIT || TARGET_GNU_TLS)
6448 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6449 return gen_rtx_PLUS (Pmode, base, off);
6453 base = get_thread_pointer (true);
6454 dest = gen_reg_rtx (Pmode);
6455 emit_insn (gen_subsi3 (dest, base, off));
6466 /* Try machine-dependent ways of modifying an illegitimate address
6467 to be legitimate. If we find one, return the new, valid address.
6468 This macro is used in only one place: `memory_address' in explow.c.
6470 OLDX is the address as it was before break_out_memory_refs was called.
6471 In some cases it is useful to look at this to decide what needs to be done.
6473 MODE and WIN are passed so that this macro can use
6474 GO_IF_LEGITIMATE_ADDRESS.
6476 It is always safe for this macro to do nothing. It exists to recognize
6477 opportunities to optimize the output.
6479 For the 80386, we handle X+REG by loading X into a register R and
6480 using R+REG. R will go in a general reg and indexing will be used.
6481 However, if REG is a broken-out memory address or multiplication,
6482 nothing needs to be done because REG can certainly go in a general reg.
6484 When -fpic is used, special handling is needed for symbolic references.
6485 See comments by legitimize_pic_address in i386.c for details. */
6488 legitimize_address (x, oldx, mode)
6490 register rtx oldx ATTRIBUTE_UNUSED;
6491 enum machine_mode mode;
6496 if (TARGET_DEBUG_ADDR)
6498 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6499 GET_MODE_NAME (mode));
6503 log = tls_symbolic_operand (x, mode);
6505 return legitimize_tls_address (x, log, false);
6507 if (flag_pic && SYMBOLIC_CONST (x))
6508 return legitimize_pic_address (x, 0);
6510 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6511 if (GET_CODE (x) == ASHIFT
6512 && GET_CODE (XEXP (x, 1)) == CONST_INT
6513 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6516 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6517 GEN_INT (1 << log));
6520 if (GET_CODE (x) == PLUS)
6522 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6524 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6525 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6526 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6529 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6530 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6531 GEN_INT (1 << log));
6534 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6535 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6536 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6539 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6540 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6541 GEN_INT (1 << log));
6544 /* Put multiply first if it isn't already. */
6545 if (GET_CODE (XEXP (x, 1)) == MULT)
6547 rtx tmp = XEXP (x, 0);
6548 XEXP (x, 0) = XEXP (x, 1);
6553 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6554 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6555 created by virtual register instantiation, register elimination, and
6556 similar optimizations. */
6557 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6560 x = gen_rtx_PLUS (Pmode,
6561 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6562 XEXP (XEXP (x, 1), 0)),
6563 XEXP (XEXP (x, 1), 1));
6567 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6568 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6569 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6571 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6572 && CONSTANT_P (XEXP (x, 1)))
6575 rtx other = NULL_RTX;
6577 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6579 constant = XEXP (x, 1);
6580 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6582 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6584 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6585 other = XEXP (x, 1);
6593 x = gen_rtx_PLUS (Pmode,
6594 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6595 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6596 plus_constant (other, INTVAL (constant)));
6600 if (changed && legitimate_address_p (mode, x, FALSE))
6603 if (GET_CODE (XEXP (x, 0)) == MULT)
6606 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6609 if (GET_CODE (XEXP (x, 1)) == MULT)
6612 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6616 && GET_CODE (XEXP (x, 1)) == REG
6617 && GET_CODE (XEXP (x, 0)) == REG)
6620 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6623 x = legitimize_pic_address (x, 0);
6626 if (changed && legitimate_address_p (mode, x, FALSE))
6629 if (GET_CODE (XEXP (x, 0)) == REG)
6631 register rtx temp = gen_reg_rtx (Pmode);
6632 register rtx val = force_operand (XEXP (x, 1), temp);
6634 emit_move_insn (temp, val);
6640 else if (GET_CODE (XEXP (x, 1)) == REG)
6642 register rtx temp = gen_reg_rtx (Pmode);
6643 register rtx val = force_operand (XEXP (x, 0), temp);
6645 emit_move_insn (temp, val);
6655 /* Print an integer constant expression in assembler syntax. Addition
6656 and subtraction are the only arithmetic that may appear in these
6657 expressions. FILE is the stdio stream to write to, X is the rtx, and
6658 CODE is the operand print code from the output string. */
6661 output_pic_addr_const (file, x, code)
6668 switch (GET_CODE (x))
6678 assemble_name (file, XSTR (x, 0));
6679 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6680 fputs ("@PLT", file);
6687 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6688 assemble_name (asm_out_file, buf);
6692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6696 /* This used to output parentheses around the expression,
6697 but that does not work on the 386 (either ATT or BSD assembler). */
6698 output_pic_addr_const (file, XEXP (x, 0), code);
6702 if (GET_MODE (x) == VOIDmode)
6704 /* We can use %d if the number is <32 bits and positive. */
6705 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6706 fprintf (file, "0x%lx%08lx",
6707 (unsigned long) CONST_DOUBLE_HIGH (x),
6708 (unsigned long) CONST_DOUBLE_LOW (x));
6710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6713 /* We can't handle floating point constants;
6714 PRINT_OPERAND must handle them. */
6715 output_operand_lossage ("floating constant misused");
6719 /* Some assemblers need integer constants to appear first. */
6720 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6722 output_pic_addr_const (file, XEXP (x, 0), code);
6724 output_pic_addr_const (file, XEXP (x, 1), code);
6726 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6728 output_pic_addr_const (file, XEXP (x, 1), code);
6730 output_pic_addr_const (file, XEXP (x, 0), code);
6738 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6739 output_pic_addr_const (file, XEXP (x, 0), code);
6741 output_pic_addr_const (file, XEXP (x, 1), code);
6743 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6747 if (XVECLEN (x, 0) != 1)
6749 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6750 switch (XINT (x, 1))
6753 fputs ("@GOT", file);
6756 fputs ("@GOTOFF", file);
6758 case UNSPEC_GOTPCREL:
6759 fputs ("@GOTPCREL(%rip)", file);
6761 case UNSPEC_GOTTPOFF:
6762 /* FIXME: This might be @TPOFF in Sun ld too. */
6763 fputs ("@GOTTPOFF", file);
6766 fputs ("@TPOFF", file);
6770 fputs ("@TPOFF", file);
6772 fputs ("@NTPOFF", file);
6775 fputs ("@DTPOFF", file);
6777 case UNSPEC_GOTNTPOFF:
6779 fputs ("@GOTTPOFF(%rip)", file);
6781 fputs ("@GOTNTPOFF", file);
6783 case UNSPEC_INDNTPOFF:
6784 fputs ("@INDNTPOFF", file);
6787 output_operand_lossage ("invalid UNSPEC as operand");
6793 output_operand_lossage ("invalid expression as operand");
6797 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6798 We need to handle our special PIC relocations. */
6801 i386_dwarf_output_addr_const (file, x)
6806 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6810 fprintf (file, "%s", ASM_LONG);
6813 output_pic_addr_const (file, x, '\0');
6815 output_addr_const (file, x);
6819 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6820 We need to emit DTP-relative relocations. */
6823 i386_output_dwarf_dtprel (file, size, x)
6828 fputs (ASM_LONG, file);
6829 output_addr_const (file, x);
6830 fputs ("@DTPOFF", file);
6836 fputs (", 0", file);
6843 /* In the name of slightly smaller debug output, and to cater to
6844 general assembler losage, recognize PIC+GOTOFF and turn it back
6845 into a direct symbol reference. */
6848 ix86_delegitimize_address (orig_x)
6853 if (GET_CODE (x) == MEM)
6858 if (GET_CODE (x) != CONST
6859 || GET_CODE (XEXP (x, 0)) != UNSPEC
6860 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6861 || GET_CODE (orig_x) != MEM)
6863 return XVECEXP (XEXP (x, 0), 0, 0);
6866 if (GET_CODE (x) != PLUS
6867 || GET_CODE (XEXP (x, 1)) != CONST)
6870 if (GET_CODE (XEXP (x, 0)) == REG
6871 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6872 /* %ebx + GOT/GOTOFF */
6874 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6876 /* %ebx + %reg * scale + GOT/GOTOFF */
6878 if (GET_CODE (XEXP (y, 0)) == REG
6879 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6881 else if (GET_CODE (XEXP (y, 1)) == REG
6882 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6886 if (GET_CODE (y) != REG
6887 && GET_CODE (y) != MULT
6888 && GET_CODE (y) != ASHIFT)
6894 x = XEXP (XEXP (x, 1), 0);
6895 if (GET_CODE (x) == UNSPEC
6896 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6897 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6900 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6901 return XVECEXP (x, 0, 0);
6904 if (GET_CODE (x) == PLUS
6905 && GET_CODE (XEXP (x, 0)) == UNSPEC
6906 && GET_CODE (XEXP (x, 1)) == CONST_INT
6907 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6908 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6909 && GET_CODE (orig_x) != MEM)))
6911 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6913 return gen_rtx_PLUS (Pmode, y, x);
6921 put_condition_code (code, mode, reverse, fp, file)
6923 enum machine_mode mode;
6929 if (mode == CCFPmode || mode == CCFPUmode)
6931 enum rtx_code second_code, bypass_code;
6932 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6933 if (bypass_code != NIL || second_code != NIL)
6935 code = ix86_fp_compare_code_to_integer (code);
6939 code = reverse_condition (code);
6950 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6955 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6956 Those same assemblers have the same but opposite losage on cmov. */
6959 suffix = fp ? "nbe" : "a";
6962 if (mode == CCNOmode || mode == CCGOCmode)
6964 else if (mode == CCmode || mode == CCGCmode)
6975 if (mode == CCNOmode || mode == CCGOCmode)
6977 else if (mode == CCmode || mode == CCGCmode)
6986 suffix = fp ? "nb" : "ae";
6989 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6999 suffix = fp ? "u" : "p";
7002 suffix = fp ? "nu" : "np";
7007 fputs (suffix, file);
7011 print_reg (x, code, file)
7016 if (REGNO (x) == ARG_POINTER_REGNUM
7017 || REGNO (x) == FRAME_POINTER_REGNUM
7018 || REGNO (x) == FLAGS_REG
7019 || REGNO (x) == FPSR_REG)
7022 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7025 if (code == 'w' || MMX_REG_P (x))
7027 else if (code == 'b')
7029 else if (code == 'k')
7031 else if (code == 'q')
7033 else if (code == 'y')
7035 else if (code == 'h')
7038 code = GET_MODE_SIZE (GET_MODE (x));
7040 /* Irritatingly, AMD extended registers use different naming convention
7041 from the normal registers. */
7042 if (REX_INT_REG_P (x))
7049 error ("extended registers have no high halves");
7052 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7055 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7058 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7061 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7064 error ("unsupported operand size for extended register");
7072 if (STACK_TOP_P (x))
7074 fputs ("st(0)", file);
7081 if (! ANY_FP_REG_P (x))
7082 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7086 fputs (hi_reg_name[REGNO (x)], file);
7089 fputs (qi_reg_name[REGNO (x)], file);
7092 fputs (qi_high_reg_name[REGNO (x)], file);
7099 /* Locate some local-dynamic symbol still in use by this function
7100 so that we can print its name in some tls_local_dynamic_base
7104 get_some_local_dynamic_name ()
7108 if (cfun->machine->some_ld_name)
7109 return cfun->machine->some_ld_name;
7111 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7113 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7114 return cfun->machine->some_ld_name;
7120 get_some_local_dynamic_name_1 (px, data)
7122 void *data ATTRIBUTE_UNUSED;
7126 if (GET_CODE (x) == SYMBOL_REF
7127 && local_dynamic_symbolic_operand (x, Pmode))
7129 cfun->machine->some_ld_name = XSTR (x, 0);
7137 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7138 C -- print opcode suffix for set/cmov insn.
7139 c -- like C, but print reversed condition
7140 F,f -- likewise, but for floating-point.
7141 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7143 R -- print the prefix for register names.
7144 z -- print the opcode suffix for the size of the current operand.
7145 * -- print a star (in certain assembler syntax)
7146 A -- print an absolute memory reference.
7147 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7148 s -- print a shift double count, followed by the assemblers argument
7150 b -- print the QImode name of the register for the indicated operand.
7151 %b0 would print %al if operands[0] is reg 0.
7152 w -- likewise, print the HImode name of the register.
7153 k -- likewise, print the SImode name of the register.
7154 q -- likewise, print the DImode name of the register.
7155 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7156 y -- print "st(0)" instead of "st" as a register.
7157 D -- print condition for SSE cmp instruction.
7158 P -- if PIC, print an @PLT suffix.
7159 X -- don't print any sort of PIC '@' suffix for a symbol.
7160 & -- print some in-use local-dynamic symbol name.
7164 print_operand (file, x, code)
7174 if (ASSEMBLER_DIALECT == ASM_ATT)
7179 assemble_name (file, get_some_local_dynamic_name ());
7183 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7187 /* Intel syntax. For absolute addresses, registers should not
7188 be surrounded by braces. */
7189 if (GET_CODE (x) != REG)
7192 PRINT_OPERAND (file, x, 0);
7200 PRINT_OPERAND (file, x, 0);
7205 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 if (ASSEMBLER_DIALECT == ASM_ATT)
7215 if (ASSEMBLER_DIALECT == ASM_ATT)
7220 if (ASSEMBLER_DIALECT == ASM_ATT)
7225 if (ASSEMBLER_DIALECT == ASM_ATT)
7230 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 /* 387 opcodes don't get size suffixes if the operands are
7237 if (STACK_REG_P (x))
7240 /* Likewise if using Intel opcodes. */
7241 if (ASSEMBLER_DIALECT == ASM_INTEL)
7244 /* This is the size of op from size of operand. */
7245 switch (GET_MODE_SIZE (GET_MODE (x)))
7248 #ifdef HAVE_GAS_FILDS_FISTS
7254 if (GET_MODE (x) == SFmode)
7269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7271 #ifdef GAS_MNEMONICS
7297 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7299 PRINT_OPERAND (file, x, 0);
7305 /* Little bit of braindamage here. The SSE compare instructions
7306 does use completely different names for the comparisons that the
7307 fp conditional moves. */
7308 switch (GET_CODE (x))
7323 fputs ("unord", file);
7327 fputs ("neq", file);
7331 fputs ("nlt", file);
7335 fputs ("nle", file);
7338 fputs ("ord", file);
7346 #ifdef CMOV_SUN_AS_SYNTAX
7347 if (ASSEMBLER_DIALECT == ASM_ATT)
7349 switch (GET_MODE (x))
7351 case HImode: putc ('w', file); break;
7353 case SFmode: putc ('l', file); break;
7355 case DFmode: putc ('q', file); break;
7363 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7366 #ifdef CMOV_SUN_AS_SYNTAX
7367 if (ASSEMBLER_DIALECT == ASM_ATT)
7370 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7373 /* Like above, but reverse condition */
7375 /* Check to see if argument to %c is really a constant
7376 and not a condition code which needs to be reversed. */
7377 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7379 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7382 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7385 #ifdef CMOV_SUN_AS_SYNTAX
7386 if (ASSEMBLER_DIALECT == ASM_ATT)
7389 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7395 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7398 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7401 int pred_val = INTVAL (XEXP (x, 0));
7403 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7404 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7406 int taken = pred_val > REG_BR_PROB_BASE / 2;
7407 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7409 /* Emit hints only in the case default branch prediction
7410 heuristics would fail. */
7411 if (taken != cputaken)
7413 /* We use 3e (DS) prefix for taken branches and
7414 2e (CS) prefix for not taken branches. */
7416 fputs ("ds ; ", file);
7418 fputs ("cs ; ", file);
7425 output_operand_lossage ("invalid operand code `%c'", code);
7429 if (GET_CODE (x) == REG)
7431 PRINT_REG (x, code, file);
7434 else if (GET_CODE (x) == MEM)
7436 /* No `byte ptr' prefix for call instructions. */
7437 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7440 switch (GET_MODE_SIZE (GET_MODE (x)))
7442 case 1: size = "BYTE"; break;
7443 case 2: size = "WORD"; break;
7444 case 4: size = "DWORD"; break;
7445 case 8: size = "QWORD"; break;
7446 case 12: size = "XWORD"; break;
7447 case 16: size = "XMMWORD"; break;
7452 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7455 else if (code == 'w')
7457 else if (code == 'k')
7461 fputs (" PTR ", file);
7465 /* Avoid (%rip) for call operands. */
7466 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7467 && GET_CODE (x) != CONST_INT)
7468 output_addr_const (file, x);
7469 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7470 output_operand_lossage ("invalid constraints for operand");
7475 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7481 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7483 if (ASSEMBLER_DIALECT == ASM_ATT)
7485 fprintf (file, "0x%lx", l);
7488 /* These float cases don't actually occur as immediate operands. */
7489 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7493 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7494 fprintf (file, "%s", dstr);
7497 else if (GET_CODE (x) == CONST_DOUBLE
7498 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7502 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7503 fprintf (file, "%s", dstr);
7510 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7512 if (ASSEMBLER_DIALECT == ASM_ATT)
7515 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7516 || GET_CODE (x) == LABEL_REF)
7518 if (ASSEMBLER_DIALECT == ASM_ATT)
7521 fputs ("OFFSET FLAT:", file);
7524 if (GET_CODE (x) == CONST_INT)
7525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7527 output_pic_addr_const (file, x, code);
7529 output_addr_const (file, x);
7533 /* Print a memory operand whose address is ADDR. */
7536 print_operand_address (file, addr)
7540 struct ix86_address parts;
7541 rtx base, index, disp;
7544 if (! ix86_decompose_address (addr, &parts))
7548 index = parts.index;
7550 scale = parts.scale;
7558 if (USER_LABEL_PREFIX[0] == 0)
7560 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7566 if (!base && !index)
7568 /* Displacement only requires special attention. */
7570 if (GET_CODE (disp) == CONST_INT)
7572 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7574 if (USER_LABEL_PREFIX[0] == 0)
7576 fputs ("ds:", file);
7578 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7581 output_pic_addr_const (file, disp, 0);
7583 output_addr_const (file, disp);
7585 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7587 && ((GET_CODE (disp) == SYMBOL_REF
7588 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7589 || GET_CODE (disp) == LABEL_REF
7590 || (GET_CODE (disp) == CONST
7591 && GET_CODE (XEXP (disp, 0)) == PLUS
7592 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7593 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7594 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7595 fputs ("(%rip)", file);
7599 if (ASSEMBLER_DIALECT == ASM_ATT)
7604 output_pic_addr_const (file, disp, 0);
7605 else if (GET_CODE (disp) == LABEL_REF)
7606 output_asm_label (disp);
7608 output_addr_const (file, disp);
7613 PRINT_REG (base, 0, file);
7617 PRINT_REG (index, 0, file);
7619 fprintf (file, ",%d", scale);
7625 rtx offset = NULL_RTX;
7629 /* Pull out the offset of a symbol; print any symbol itself. */
7630 if (GET_CODE (disp) == CONST
7631 && GET_CODE (XEXP (disp, 0)) == PLUS
7632 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7634 offset = XEXP (XEXP (disp, 0), 1);
7635 disp = gen_rtx_CONST (VOIDmode,
7636 XEXP (XEXP (disp, 0), 0));
7640 output_pic_addr_const (file, disp, 0);
7641 else if (GET_CODE (disp) == LABEL_REF)
7642 output_asm_label (disp);
7643 else if (GET_CODE (disp) == CONST_INT)
7646 output_addr_const (file, disp);
7652 PRINT_REG (base, 0, file);
7655 if (INTVAL (offset) >= 0)
7657 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7661 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7668 PRINT_REG (index, 0, file);
7670 fprintf (file, "*%d", scale);
7678 output_addr_const_extra (file, x)
7684 if (GET_CODE (x) != UNSPEC)
7687 op = XVECEXP (x, 0, 0);
7688 switch (XINT (x, 1))
7690 case UNSPEC_GOTTPOFF:
7691 output_addr_const (file, op);
7692 /* FIXME: This might be @TPOFF in Sun ld. */
7693 fputs ("@GOTTPOFF", file);
7696 output_addr_const (file, op);
7697 fputs ("@TPOFF", file);
7700 output_addr_const (file, op);
7702 fputs ("@TPOFF", file);
7704 fputs ("@NTPOFF", file);
7707 output_addr_const (file, op);
7708 fputs ("@DTPOFF", file);
7710 case UNSPEC_GOTNTPOFF:
7711 output_addr_const (file, op);
7713 fputs ("@GOTTPOFF(%rip)", file);
7715 fputs ("@GOTNTPOFF", file);
7717 case UNSPEC_INDNTPOFF:
7718 output_addr_const (file, op);
7719 fputs ("@INDNTPOFF", file);
7729 /* Split one or more DImode RTL references into pairs of SImode
7730 references. The RTL can be REG, offsettable MEM, integer constant, or
7731 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7732 split and "num" is its length. lo_half and hi_half are output arrays
7733 that parallel "operands". */
7736 split_di (operands, num, lo_half, hi_half)
7739 rtx lo_half[], hi_half[];
7743 rtx op = operands[num];
7745 /* simplify_subreg refuse to split volatile memory addresses,
7746 but we still have to handle it. */
7747 if (GET_CODE (op) == MEM)
7749 lo_half[num] = adjust_address (op, SImode, 0);
7750 hi_half[num] = adjust_address (op, SImode, 4);
7754 lo_half[num] = simplify_gen_subreg (SImode, op,
7755 GET_MODE (op) == VOIDmode
7756 ? DImode : GET_MODE (op), 0);
7757 hi_half[num] = simplify_gen_subreg (SImode, op,
7758 GET_MODE (op) == VOIDmode
7759 ? DImode : GET_MODE (op), 4);
7763 /* Split one or more TImode RTL references into pairs of SImode
7764 references. The RTL can be REG, offsettable MEM, integer constant, or
7765 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7766 split and "num" is its length. lo_half and hi_half are output arrays
7767 that parallel "operands". */
7770 split_ti (operands, num, lo_half, hi_half)
7773 rtx lo_half[], hi_half[];
7777 rtx op = operands[num];
7779 /* simplify_subreg refuse to split volatile memory addresses, but we
7780 still have to handle it. */
7781 if (GET_CODE (op) == MEM)
7783 lo_half[num] = adjust_address (op, DImode, 0);
7784 hi_half[num] = adjust_address (op, DImode, 8);
7788 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7789 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7794 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7795 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7796 is the expression of the binary operation. The output may either be
7797 emitted here, or returned to the caller, like all output_* functions.
7799 There is no guarantee that the operands are the same mode, as they
7800 might be within FLOAT or FLOAT_EXTEND expressions. */
7802 #ifndef SYSV386_COMPAT
7803 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7804 wants to fix the assemblers because that causes incompatibility
7805 with gcc. No-one wants to fix gcc because that causes
7806 incompatibility with assemblers... You can use the option of
7807 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7808 #define SYSV386_COMPAT 1
7812 output_387_binary_op (insn, operands)
7816 static char buf[30];
7819 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7821 #ifdef ENABLE_CHECKING
7822 /* Even if we do not want to check the inputs, this documents input
7823 constraints. Which helps in understanding the following code. */
7824 if (STACK_REG_P (operands[0])
7825 && ((REG_P (operands[1])
7826 && REGNO (operands[0]) == REGNO (operands[1])
7827 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7828 || (REG_P (operands[2])
7829 && REGNO (operands[0]) == REGNO (operands[2])
7830 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7831 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7837 switch (GET_CODE (operands[3]))
7840 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7841 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7849 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7850 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7858 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7859 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7867 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7868 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7882 if (GET_MODE (operands[0]) == SFmode)
7883 strcat (buf, "ss\t{%2, %0|%0, %2}");
7885 strcat (buf, "sd\t{%2, %0|%0, %2}");
7890 switch (GET_CODE (operands[3]))
7894 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7896 rtx temp = operands[2];
7897 operands[2] = operands[1];
7901 /* know operands[0] == operands[1]. */
7903 if (GET_CODE (operands[2]) == MEM)
7909 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7911 if (STACK_TOP_P (operands[0]))
7912 /* How is it that we are storing to a dead operand[2]?
7913 Well, presumably operands[1] is dead too. We can't
7914 store the result to st(0) as st(0) gets popped on this
7915 instruction. Instead store to operands[2] (which I
7916 think has to be st(1)). st(1) will be popped later.
7917 gcc <= 2.8.1 didn't have this check and generated
7918 assembly code that the Unixware assembler rejected. */
7919 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7921 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7925 if (STACK_TOP_P (operands[0]))
7926 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7928 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7933 if (GET_CODE (operands[1]) == MEM)
7939 if (GET_CODE (operands[2]) == MEM)
7945 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7948 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7949 derived assemblers, confusingly reverse the direction of
7950 the operation for fsub{r} and fdiv{r} when the
7951 destination register is not st(0). The Intel assembler
7952 doesn't have this brain damage. Read !SYSV386_COMPAT to
7953 figure out what the hardware really does. */
7954 if (STACK_TOP_P (operands[0]))
7955 p = "{p\t%0, %2|rp\t%2, %0}";
7957 p = "{rp\t%2, %0|p\t%0, %2}";
7959 if (STACK_TOP_P (operands[0]))
7960 /* As above for fmul/fadd, we can't store to st(0). */
7961 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7963 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7968 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7971 if (STACK_TOP_P (operands[0]))
7972 p = "{rp\t%0, %1|p\t%1, %0}";
7974 p = "{p\t%1, %0|rp\t%0, %1}";
7976 if (STACK_TOP_P (operands[0]))
7977 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7979 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7984 if (STACK_TOP_P (operands[0]))
7986 if (STACK_TOP_P (operands[1]))
7987 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7989 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7992 else if (STACK_TOP_P (operands[1]))
7995 p = "{\t%1, %0|r\t%0, %1}";
7997 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8003 p = "{r\t%2, %0|\t%0, %2}";
8005 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8018 /* Output code to initialize control word copies used by
8019 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8020 is set to control word rounding downwards. */
8022 emit_i387_cw_initialization (normal, round_down)
8023 rtx normal, round_down;
8025 rtx reg = gen_reg_rtx (HImode);
8027 emit_insn (gen_x86_fnstcw_1 (normal));
8028 emit_move_insn (reg, normal);
8029 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8031 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8033 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8034 emit_move_insn (round_down, reg);
8037 /* Output code for INSN to convert a float to a signed int. OPERANDS
8038 are the insn operands. The output may be [HSD]Imode and the input
8039 operand may be [SDX]Fmode. */
8042 output_fix_trunc (insn, operands)
8046 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8047 int dimode_p = GET_MODE (operands[0]) == DImode;
8049 /* Jump through a hoop or two for DImode, since the hardware has no
8050 non-popping instruction. We used to do this a different way, but
8051 that was somewhat fragile and broke with post-reload splitters. */
8052 if (dimode_p && !stack_top_dies)
8053 output_asm_insn ("fld\t%y1", operands);
8055 if (!STACK_TOP_P (operands[1]))
8058 if (GET_CODE (operands[0]) != MEM)
8061 output_asm_insn ("fldcw\t%3", operands);
8062 if (stack_top_dies || dimode_p)
8063 output_asm_insn ("fistp%z0\t%0", operands);
8065 output_asm_insn ("fist%z0\t%0", operands);
8066 output_asm_insn ("fldcw\t%2", operands);
8071 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8072 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8073 when fucom should be used. */
8076 output_fp_compare (insn, operands, eflags_p, unordered_p)
8079 int eflags_p, unordered_p;
8082 rtx cmp_op0 = operands[0];
8083 rtx cmp_op1 = operands[1];
8084 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8089 cmp_op1 = operands[2];
8093 if (GET_MODE (operands[0]) == SFmode)
8095 return "ucomiss\t{%1, %0|%0, %1}";
8097 return "comiss\t{%1, %0|%0, %1}";
8100 return "ucomisd\t{%1, %0|%0, %1}";
8102 return "comisd\t{%1, %0|%0, %1}";
8105 if (! STACK_TOP_P (cmp_op0))
8108 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8110 if (STACK_REG_P (cmp_op1)
8112 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8113 && REGNO (cmp_op1) != FIRST_STACK_REG)
8115 /* If both the top of the 387 stack dies, and the other operand
8116 is also a stack register that dies, then this must be a
8117 `fcompp' float compare */
8121 /* There is no double popping fcomi variant. Fortunately,
8122 eflags is immune from the fstp's cc clobbering. */
8124 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8126 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8134 return "fucompp\n\tfnstsw\t%0";
8136 return "fcompp\n\tfnstsw\t%0";
8149 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8151 static const char * const alt[24] =
8163 "fcomi\t{%y1, %0|%0, %y1}",
8164 "fcomip\t{%y1, %0|%0, %y1}",
8165 "fucomi\t{%y1, %0|%0, %y1}",
8166 "fucomip\t{%y1, %0|%0, %y1}",
8173 "fcom%z2\t%y2\n\tfnstsw\t%0",
8174 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8175 "fucom%z2\t%y2\n\tfnstsw\t%0",
8176 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8178 "ficom%z2\t%y2\n\tfnstsw\t%0",
8179 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8187 mask = eflags_p << 3;
8188 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8189 mask |= unordered_p << 1;
8190 mask |= stack_top_dies;
8203 ix86_output_addr_vec_elt (file, value)
8207 const char *directive = ASM_LONG;
8212 directive = ASM_QUAD;
8218 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8222 ix86_output_addr_diff_elt (file, value, rel)
8227 fprintf (file, "%s%s%d-%s%d\n",
8228 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8229 else if (HAVE_AS_GOTOFF_IN_DATA)
8230 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8232 else if (TARGET_MACHO)
8233 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8234 machopic_function_base_name () + 1);
8237 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8238 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8241 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8245 ix86_expand_clear (dest)
8250 /* We play register width games, which are only valid after reload. */
8251 if (!reload_completed)
8254 /* Avoid HImode and its attendant prefix byte. */
8255 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8256 dest = gen_rtx_REG (SImode, REGNO (dest));
8258 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8260 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8261 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8263 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8264 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8270 /* X is an unchanging MEM. If it is a constant pool reference, return
8271 the constant pool rtx, else NULL. */
8274 maybe_get_pool_constant (x)
8277 x = ix86_delegitimize_address (XEXP (x, 0));
8279 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8280 return get_pool_constant (x);
8286 ix86_expand_move (mode, operands)
8287 enum machine_mode mode;
8290 int strict = (reload_in_progress || reload_completed);
8292 enum tls_model model;
8297 model = tls_symbolic_operand (op1, Pmode);
8300 op1 = legitimize_tls_address (op1, model, true);
8301 op1 = force_operand (op1, op0);
8306 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8311 rtx temp = ((reload_in_progress
8312 || ((op0 && GET_CODE (op0) == REG)
8314 ? op0 : gen_reg_rtx (Pmode));
8315 op1 = machopic_indirect_data_reference (op1, temp);
8316 op1 = machopic_legitimize_pic_address (op1, mode,
8317 temp == op1 ? 0 : temp);
8319 else if (MACHOPIC_INDIRECT)
8320 op1 = machopic_indirect_data_reference (op1, 0);
8324 if (GET_CODE (op0) == MEM)
8325 op1 = force_reg (Pmode, op1);
8329 if (GET_CODE (temp) != REG)
8330 temp = gen_reg_rtx (Pmode);
8331 temp = legitimize_pic_address (op1, temp);
8336 #endif /* TARGET_MACHO */
8340 if (GET_CODE (op0) == MEM
8341 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8342 || !push_operand (op0, mode))
8343 && GET_CODE (op1) == MEM)
8344 op1 = force_reg (mode, op1);
8346 if (push_operand (op0, mode)
8347 && ! general_no_elim_operand (op1, mode))
8348 op1 = copy_to_mode_reg (mode, op1);
8350 /* Force large constants in 64bit compilation into register
8351 to get them CSEed. */
8352 if (TARGET_64BIT && mode == DImode
8353 && immediate_operand (op1, mode)
8354 && !x86_64_zero_extended_value (op1)
8355 && !register_operand (op0, mode)
8356 && optimize && !reload_completed && !reload_in_progress)
8357 op1 = copy_to_mode_reg (mode, op1);
8359 if (FLOAT_MODE_P (mode))
8361 /* If we are loading a floating point constant to a register,
8362 force the value to memory now, since we'll get better code
8363 out the back end. */
8367 else if (GET_CODE (op1) == CONST_DOUBLE)
8369 op1 = validize_mem (force_const_mem (mode, op1));
8370 if (!register_operand (op0, mode))
8372 rtx temp = gen_reg_rtx (mode);
8373 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8374 emit_move_insn (op0, temp);
8381 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8385 ix86_expand_vector_move (mode, operands)
8386 enum machine_mode mode;
8389 /* Force constants other than zero into memory. We do not know how
8390 the instructions used to build constants modify the upper 64 bits
8391 of the register, once we have that information we may be able
8392 to handle some of them more efficiently. */
8393 if ((reload_in_progress | reload_completed) == 0
8394 && register_operand (operands[0], mode)
8395 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8396 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8398 /* Make operand1 a register if it isn't already. */
8400 && !register_operand (operands[0], mode)
8401 && !register_operand (operands[1], mode))
8403 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8404 emit_move_insn (operands[0], temp);
8408 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8411 /* Attempt to expand a binary operator. Make the expansion closer to the
8412 actual machine, then just general_operand, which will allow 3 separate
8413 memory references (one output, two input) in a single insn. */
8416 ix86_expand_binary_operator (code, mode, operands)
8418 enum machine_mode mode;
8421 int matching_memory;
8422 rtx src1, src2, dst, op, clob;
8428 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8429 if (GET_RTX_CLASS (code) == 'c'
8430 && (rtx_equal_p (dst, src2)
8431 || immediate_operand (src1, mode)))
8438 /* If the destination is memory, and we do not have matching source
8439 operands, do things in registers. */
8440 matching_memory = 0;
8441 if (GET_CODE (dst) == MEM)
8443 if (rtx_equal_p (dst, src1))
8444 matching_memory = 1;
8445 else if (GET_RTX_CLASS (code) == 'c'
8446 && rtx_equal_p (dst, src2))
8447 matching_memory = 2;
8449 dst = gen_reg_rtx (mode);
8452 /* Both source operands cannot be in memory. */
8453 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8455 if (matching_memory != 2)
8456 src2 = force_reg (mode, src2);
8458 src1 = force_reg (mode, src1);
8461 /* If the operation is not commutable, source 1 cannot be a constant
8462 or non-matching memory. */
8463 if ((CONSTANT_P (src1)
8464 || (!matching_memory && GET_CODE (src1) == MEM))
8465 && GET_RTX_CLASS (code) != 'c')
8466 src1 = force_reg (mode, src1);
8468 /* If optimizing, copy to regs to improve CSE */
8469 if (optimize && ! no_new_pseudos)
8471 if (GET_CODE (dst) == MEM)
8472 dst = gen_reg_rtx (mode);
8473 if (GET_CODE (src1) == MEM)
8474 src1 = force_reg (mode, src1);
8475 if (GET_CODE (src2) == MEM)
8476 src2 = force_reg (mode, src2);
8479 /* Emit the instruction. */
8481 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8482 if (reload_in_progress)
8484 /* Reload doesn't know about the flags register, and doesn't know that
8485 it doesn't want to clobber it. We can only do this with PLUS. */
8492 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8493 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8496 /* Fix up the destination if needed. */
8497 if (dst != operands[0])
8498 emit_move_insn (operands[0], dst);
8501 /* Return TRUE or FALSE depending on whether the binary operator meets the
8502 appropriate constraints. */
8505 ix86_binary_operator_ok (code, mode, operands)
8507 enum machine_mode mode ATTRIBUTE_UNUSED;
8510 /* Both source operands cannot be in memory. */
8511 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8513 /* If the operation is not commutable, source 1 cannot be a constant. */
8514 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8516 /* If the destination is memory, we must have a matching source operand. */
8517 if (GET_CODE (operands[0]) == MEM
8518 && ! (rtx_equal_p (operands[0], operands[1])
8519 || (GET_RTX_CLASS (code) == 'c'
8520 && rtx_equal_p (operands[0], operands[2]))))
8522 /* If the operation is not commutable and the source 1 is memory, we must
8523 have a matching destination. */
8524 if (GET_CODE (operands[1]) == MEM
8525 && GET_RTX_CLASS (code) != 'c'
8526 && ! rtx_equal_p (operands[0], operands[1]))
8531 /* Attempt to expand a unary operator. Make the expansion closer to the
8532 actual machine, then just general_operand, which will allow 2 separate
8533 memory references (one output, one input) in a single insn. */
8536 ix86_expand_unary_operator (code, mode, operands)
8538 enum machine_mode mode;
8541 int matching_memory;
8542 rtx src, dst, op, clob;
8547 /* If the destination is memory, and we do not have matching source
8548 operands, do things in registers. */
8549 matching_memory = 0;
8550 if (GET_CODE (dst) == MEM)
8552 if (rtx_equal_p (dst, src))
8553 matching_memory = 1;
8555 dst = gen_reg_rtx (mode);
8558 /* When source operand is memory, destination must match. */
8559 if (!matching_memory && GET_CODE (src) == MEM)
8560 src = force_reg (mode, src);
8562 /* If optimizing, copy to regs to improve CSE */
8563 if (optimize && ! no_new_pseudos)
8565 if (GET_CODE (dst) == MEM)
8566 dst = gen_reg_rtx (mode);
8567 if (GET_CODE (src) == MEM)
8568 src = force_reg (mode, src);
8571 /* Emit the instruction. */
8573 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8574 if (reload_in_progress || code == NOT)
8576 /* Reload doesn't know about the flags register, and doesn't know that
8577 it doesn't want to clobber it. */
8584 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8585 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8588 /* Fix up the destination if needed. */
8589 if (dst != operands[0])
8590 emit_move_insn (operands[0], dst);
8593 /* Return TRUE or FALSE depending on whether the unary operator meets the
8594 appropriate constraints. */
8597 ix86_unary_operator_ok (code, mode, operands)
8598 enum rtx_code code ATTRIBUTE_UNUSED;
8599 enum machine_mode mode ATTRIBUTE_UNUSED;
8600 rtx operands[2] ATTRIBUTE_UNUSED;
8602 /* If one of operands is memory, source and destination must match. */
8603 if ((GET_CODE (operands[0]) == MEM
8604 || GET_CODE (operands[1]) == MEM)
8605 && ! rtx_equal_p (operands[0], operands[1]))
8610 /* Return TRUE or FALSE depending on whether the first SET in INSN
8611 has source and destination with matching CC modes, and that the
8612 CC mode is at least as constrained as REQ_MODE. */
8615 ix86_match_ccmode (insn, req_mode)
8617 enum machine_mode req_mode;
8620 enum machine_mode set_mode;
8622 set = PATTERN (insn);
8623 if (GET_CODE (set) == PARALLEL)
8624 set = XVECEXP (set, 0, 0);
8625 if (GET_CODE (set) != SET)
8627 if (GET_CODE (SET_SRC (set)) != COMPARE)
8630 set_mode = GET_MODE (SET_DEST (set));
8634 if (req_mode != CCNOmode
8635 && (req_mode != CCmode
8636 || XEXP (SET_SRC (set), 1) != const0_rtx))
8640 if (req_mode == CCGCmode)
8644 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8648 if (req_mode == CCZmode)
8658 return (GET_MODE (SET_SRC (set)) == set_mode);
8661 /* Generate insn patterns to do an integer compare of OPERANDS. */
8664 ix86_expand_int_compare (code, op0, op1)
8668 enum machine_mode cmpmode;
8671 cmpmode = SELECT_CC_MODE (code, op0, op1);
8672 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8674 /* This is very simple, but making the interface the same as in the
8675 FP case makes the rest of the code easier. */
8676 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8677 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8679 /* Return the test that should be put into the flags user, i.e.
8680 the bcc, scc, or cmov instruction. */
8681 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8684 /* Figure out whether to use ordered or unordered fp comparisons.
8685 Return the appropriate mode to use. */
8688 ix86_fp_compare_mode (code)
8689 enum rtx_code code ATTRIBUTE_UNUSED;
8691 /* ??? In order to make all comparisons reversible, we do all comparisons
8692 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8693 all forms trapping and nontrapping comparisons, we can make inequality
8694 comparisons trapping again, since it results in better code when using
8695 FCOM based compares. */
8696 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8700 ix86_cc_mode (code, op0, op1)
8704 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8705 return ix86_fp_compare_mode (code);
8708 /* Only zero flag is needed. */
8710 case NE: /* ZF!=0 */
8712 /* Codes needing carry flag. */
8713 case GEU: /* CF=0 */
8714 case GTU: /* CF=0 & ZF=0 */
8715 case LTU: /* CF=1 */
8716 case LEU: /* CF=1 | ZF=1 */
8718 /* Codes possibly doable only with sign flag when
8719 comparing against zero. */
8720 case GE: /* SF=OF or SF=0 */
8721 case LT: /* SF<>OF or SF=1 */
8722 if (op1 == const0_rtx)
8725 /* For other cases Carry flag is not required. */
8727 /* Codes doable only with sign flag when comparing
8728 against zero, but we miss jump instruction for it
8729 so we need to use relational tests against overflow
8730 that thus needs to be zero. */
8731 case GT: /* ZF=0 & SF=OF */
8732 case LE: /* ZF=1 | SF<>OF */
8733 if (op1 == const0_rtx)
8737 /* strcmp pattern do (use flags) and combine may ask us for proper
8746 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8749 ix86_use_fcomi_compare (code)
8750 enum rtx_code code ATTRIBUTE_UNUSED;
8752 enum rtx_code swapped_code = swap_condition (code);
8753 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8754 || (ix86_fp_comparison_cost (swapped_code)
8755 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8758 /* Swap, force into registers, or otherwise massage the two operands
8759 to a fp comparison. The operands are updated in place; the new
8760 comparison code is returned. */
8762 static enum rtx_code
8763 ix86_prepare_fp_compare_args (code, pop0, pop1)
8767 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8768 rtx op0 = *pop0, op1 = *pop1;
8769 enum machine_mode op_mode = GET_MODE (op0);
8770 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8772 /* All of the unordered compare instructions only work on registers.
8773 The same is true of the XFmode compare instructions. The same is
8774 true of the fcomi compare instructions. */
8777 && (fpcmp_mode == CCFPUmode
8778 || op_mode == XFmode
8779 || op_mode == TFmode
8780 || ix86_use_fcomi_compare (code)))
8782 op0 = force_reg (op_mode, op0);
8783 op1 = force_reg (op_mode, op1);
8787 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8788 things around if they appear profitable, otherwise force op0
8791 if (standard_80387_constant_p (op0) == 0
8792 || (GET_CODE (op0) == MEM
8793 && ! (standard_80387_constant_p (op1) == 0
8794 || GET_CODE (op1) == MEM)))
8797 tmp = op0, op0 = op1, op1 = tmp;
8798 code = swap_condition (code);
8801 if (GET_CODE (op0) != REG)
8802 op0 = force_reg (op_mode, op0);
8804 if (CONSTANT_P (op1))
8806 if (standard_80387_constant_p (op1))
8807 op1 = force_reg (op_mode, op1);
8809 op1 = validize_mem (force_const_mem (op_mode, op1));
8813 /* Try to rearrange the comparison to make it cheaper. */
8814 if (ix86_fp_comparison_cost (code)
8815 > ix86_fp_comparison_cost (swap_condition (code))
8816 && (GET_CODE (op1) == REG || !no_new_pseudos))
8819 tmp = op0, op0 = op1, op1 = tmp;
8820 code = swap_condition (code);
8821 if (GET_CODE (op0) != REG)
8822 op0 = force_reg (op_mode, op0);
8830 /* Convert comparison codes we use to represent FP comparison to integer
8831 code that will result in proper branch. Return UNKNOWN if no such code
8833 static enum rtx_code
8834 ix86_fp_compare_code_to_integer (code)
8864 /* Split comparison code CODE into comparisons we can do using branch
8865 instructions. BYPASS_CODE is comparison code for branch that will
8866 branch around FIRST_CODE and SECOND_CODE. If some of branches
8867 is not required, set value to NIL.
8868 We never require more than two branches. */
8870 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8871 enum rtx_code code, *bypass_code, *first_code, *second_code;
8877 /* The fcomi comparison sets flags as follows:
8887 case GT: /* GTU - CF=0 & ZF=0 */
8888 case GE: /* GEU - CF=0 */
8889 case ORDERED: /* PF=0 */
8890 case UNORDERED: /* PF=1 */
8891 case UNEQ: /* EQ - ZF=1 */
8892 case UNLT: /* LTU - CF=1 */
8893 case UNLE: /* LEU - CF=1 | ZF=1 */
8894 case LTGT: /* EQ - ZF=0 */
8896 case LT: /* LTU - CF=1 - fails on unordered */
8898 *bypass_code = UNORDERED;
8900 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8902 *bypass_code = UNORDERED;
8904 case EQ: /* EQ - ZF=1 - fails on unordered */
8906 *bypass_code = UNORDERED;
8908 case NE: /* NE - ZF=0 - fails on unordered */
8910 *second_code = UNORDERED;
8912 case UNGE: /* GEU - CF=0 - fails on unordered */
8914 *second_code = UNORDERED;
8916 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8918 *second_code = UNORDERED;
8923 if (!TARGET_IEEE_FP)
8930 /* Return cost of comparison done fcom + arithmetics operations on AX.
8931 All following functions do use number of instructions as a cost metrics.
8932 In future this should be tweaked to compute bytes for optimize_size and
8933 take into account performance of various instructions on various CPUs. */
8935 ix86_fp_comparison_arithmetics_cost (code)
8938 if (!TARGET_IEEE_FP)
8940 /* The cost of code output by ix86_expand_fp_compare. */
8968 /* Return cost of comparison done using fcomi operation.
8969 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8971 ix86_fp_comparison_fcomi_cost (code)
8974 enum rtx_code bypass_code, first_code, second_code;
8975 /* Return arbitrarily high cost when instruction is not supported - this
8976 prevents gcc from using it. */
8979 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8980 return (bypass_code != NIL || second_code != NIL) + 2;
8983 /* Return cost of comparison done using sahf operation.
8984 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8986 ix86_fp_comparison_sahf_cost (code)
8989 enum rtx_code bypass_code, first_code, second_code;
8990 /* Return arbitrarily high cost when instruction is not preferred - this
8991 avoids gcc from using it. */
8992 if (!TARGET_USE_SAHF && !optimize_size)
8994 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8995 return (bypass_code != NIL || second_code != NIL) + 3;
8998 /* Compute cost of the comparison done using any method.
8999 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9001 ix86_fp_comparison_cost (code)
9004 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9007 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9008 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9010 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9011 if (min > sahf_cost)
9013 if (min > fcomi_cost)
9018 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9021 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9023 rtx op0, op1, scratch;
9027 enum machine_mode fpcmp_mode, intcmp_mode;
9029 int cost = ix86_fp_comparison_cost (code);
9030 enum rtx_code bypass_code, first_code, second_code;
9032 fpcmp_mode = ix86_fp_compare_mode (code);
9033 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9036 *second_test = NULL_RTX;
9038 *bypass_test = NULL_RTX;
9040 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9042 /* Do fcomi/sahf based test when profitable. */
9043 if ((bypass_code == NIL || bypass_test)
9044 && (second_code == NIL || second_test)
9045 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9049 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9050 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9056 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9057 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9059 scratch = gen_reg_rtx (HImode);
9060 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9061 emit_insn (gen_x86_sahf_1 (scratch));
9064 /* The FP codes work out to act like unsigned. */
9065 intcmp_mode = fpcmp_mode;
9067 if (bypass_code != NIL)
9068 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9069 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9071 if (second_code != NIL)
9072 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9073 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9078 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9079 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9080 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9082 scratch = gen_reg_rtx (HImode);
9083 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9085 /* In the unordered case, we have to check C2 for NaN's, which
9086 doesn't happen to work out to anything nice combination-wise.
9087 So do some bit twiddling on the value we've got in AH to come
9088 up with an appropriate set of condition codes. */
9090 intcmp_mode = CCNOmode;
9095 if (code == GT || !TARGET_IEEE_FP)
9097 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9102 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9103 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9104 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9105 intcmp_mode = CCmode;
9111 if (code == LT && TARGET_IEEE_FP)
9113 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9114 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9115 intcmp_mode = CCmode;
9120 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9126 if (code == GE || !TARGET_IEEE_FP)
9128 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9133 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9134 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9141 if (code == LE && TARGET_IEEE_FP)
9143 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9144 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9145 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9146 intcmp_mode = CCmode;
9151 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9157 if (code == EQ && TARGET_IEEE_FP)
9159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9160 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9161 intcmp_mode = CCmode;
9166 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9173 if (code == NE && TARGET_IEEE_FP)
9175 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9176 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9182 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9188 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9201 /* Return the test that should be put into the flags user, i.e.
9202 the bcc, scc, or cmov instruction. */
9203 return gen_rtx_fmt_ee (code, VOIDmode,
9204 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9209 ix86_expand_compare (code, second_test, bypass_test)
9211 rtx *second_test, *bypass_test;
9214 op0 = ix86_compare_op0;
9215 op1 = ix86_compare_op1;
9218 *second_test = NULL_RTX;
9220 *bypass_test = NULL_RTX;
9222 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9223 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9224 second_test, bypass_test);
9226 ret = ix86_expand_int_compare (code, op0, op1);
9231 /* Return true if the CODE will result in nontrivial jump sequence. */
9233 ix86_fp_jump_nontrivial_p (code)
9236 enum rtx_code bypass_code, first_code, second_code;
9239 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9240 return bypass_code != NIL || second_code != NIL;
9244 ix86_expand_branch (code, label)
9250 switch (GET_MODE (ix86_compare_op0))
9256 tmp = ix86_expand_compare (code, NULL, NULL);
9257 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9258 gen_rtx_LABEL_REF (VOIDmode, label),
9260 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9270 enum rtx_code bypass_code, first_code, second_code;
9272 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9275 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9277 /* Check whether we will use the natural sequence with one jump. If
9278 so, we can expand jump early. Otherwise delay expansion by
9279 creating compound insn to not confuse optimizers. */
9280 if (bypass_code == NIL && second_code == NIL
9283 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9284 gen_rtx_LABEL_REF (VOIDmode, label),
9289 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9290 ix86_compare_op0, ix86_compare_op1);
9291 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9292 gen_rtx_LABEL_REF (VOIDmode, label),
9294 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9296 use_fcomi = ix86_use_fcomi_compare (code);
9297 vec = rtvec_alloc (3 + !use_fcomi);
9298 RTVEC_ELT (vec, 0) = tmp;
9300 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9302 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9305 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9307 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9315 /* Expand DImode branch into multiple compare+branch. */
9317 rtx lo[2], hi[2], label2;
9318 enum rtx_code code1, code2, code3;
9320 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9322 tmp = ix86_compare_op0;
9323 ix86_compare_op0 = ix86_compare_op1;
9324 ix86_compare_op1 = tmp;
9325 code = swap_condition (code);
9327 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9328 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9330 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9331 avoid two branches. This costs one extra insn, so disable when
9332 optimizing for size. */
9334 if ((code == EQ || code == NE)
9336 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9341 if (hi[1] != const0_rtx)
9342 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9343 NULL_RTX, 0, OPTAB_WIDEN);
9346 if (lo[1] != const0_rtx)
9347 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9348 NULL_RTX, 0, OPTAB_WIDEN);
9350 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9351 NULL_RTX, 0, OPTAB_WIDEN);
9353 ix86_compare_op0 = tmp;
9354 ix86_compare_op1 = const0_rtx;
9355 ix86_expand_branch (code, label);
9359 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9360 op1 is a constant and the low word is zero, then we can just
9361 examine the high word. */
9363 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9366 case LT: case LTU: case GE: case GEU:
9367 ix86_compare_op0 = hi[0];
9368 ix86_compare_op1 = hi[1];
9369 ix86_expand_branch (code, label);
9375 /* Otherwise, we need two or three jumps. */
9377 label2 = gen_label_rtx ();
9380 code2 = swap_condition (code);
9381 code3 = unsigned_condition (code);
9385 case LT: case GT: case LTU: case GTU:
9388 case LE: code1 = LT; code2 = GT; break;
9389 case GE: code1 = GT; code2 = LT; break;
9390 case LEU: code1 = LTU; code2 = GTU; break;
9391 case GEU: code1 = GTU; code2 = LTU; break;
9393 case EQ: code1 = NIL; code2 = NE; break;
9394 case NE: code2 = NIL; break;
9402 * if (hi(a) < hi(b)) goto true;
9403 * if (hi(a) > hi(b)) goto false;
9404 * if (lo(a) < lo(b)) goto true;
9408 ix86_compare_op0 = hi[0];
9409 ix86_compare_op1 = hi[1];
9412 ix86_expand_branch (code1, label);
9414 ix86_expand_branch (code2, label2);
9416 ix86_compare_op0 = lo[0];
9417 ix86_compare_op1 = lo[1];
9418 ix86_expand_branch (code3, label);
9421 emit_label (label2);
9430 /* Split branch based on floating point condition. */
9432 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9434 rtx op1, op2, target1, target2, tmp;
9437 rtx label = NULL_RTX;
9439 int bypass_probability = -1, second_probability = -1, probability = -1;
9442 if (target2 != pc_rtx)
9445 code = reverse_condition_maybe_unordered (code);
9450 condition = ix86_expand_fp_compare (code, op1, op2,
9451 tmp, &second, &bypass);
9453 if (split_branch_probability >= 0)
9455 /* Distribute the probabilities across the jumps.
9456 Assume the BYPASS and SECOND to be always test
9458 probability = split_branch_probability;
9460 /* Value of 1 is low enough to make no need for probability
9461 to be updated. Later we may run some experiments and see
9462 if unordered values are more frequent in practice. */
9464 bypass_probability = 1;
9466 second_probability = 1;
9468 if (bypass != NULL_RTX)
9470 label = gen_label_rtx ();
9471 i = emit_jump_insn (gen_rtx_SET
9473 gen_rtx_IF_THEN_ELSE (VOIDmode,
9475 gen_rtx_LABEL_REF (VOIDmode,
9478 if (bypass_probability >= 0)
9480 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9481 GEN_INT (bypass_probability),
9484 i = emit_jump_insn (gen_rtx_SET
9486 gen_rtx_IF_THEN_ELSE (VOIDmode,
9487 condition, target1, target2)));
9488 if (probability >= 0)
9490 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9491 GEN_INT (probability),
9493 if (second != NULL_RTX)
9495 i = emit_jump_insn (gen_rtx_SET
9497 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9499 if (second_probability >= 0)
9501 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9502 GEN_INT (second_probability),
9505 if (label != NULL_RTX)
9510 ix86_expand_setcc (code, dest)
9514 rtx ret, tmp, tmpreg;
9515 rtx second_test, bypass_test;
9517 if (GET_MODE (ix86_compare_op0) == DImode
9519 return 0; /* FAIL */
9521 if (GET_MODE (dest) != QImode)
9524 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9525 PUT_MODE (ret, QImode);
9530 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9531 if (bypass_test || second_test)
9533 rtx test = second_test;
9535 rtx tmp2 = gen_reg_rtx (QImode);
9542 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9544 PUT_MODE (test, QImode);
9545 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9548 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9550 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9553 return 1; /* DONE */
9556 /* Expand comparison setting or clearing carry flag. Return true when successful
9557 and set pop for the operation. */
9559 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9563 enum machine_mode mode =
9564 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9566 /* Do not handle DImode compares that go trought special path. Also we can't
9567 deal with FP compares yet. This is possible to add. */
9568 if ((mode == DImode && !TARGET_64BIT))
9570 if (FLOAT_MODE_P (mode))
9572 rtx second_test = NULL, bypass_test = NULL;
9573 rtx compare_op, compare_seq;
9575 /* Shortcut: following common codes never translate into carry flag compares. */
9576 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9577 || code == ORDERED || code == UNORDERED)
9580 /* These comparisons require zero flag; swap operands so they won't. */
9581 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9587 code = swap_condition (code);
9590 /* Try to expand the comparsion and verify that we end up with carry flag
9591 based comparsion. This is fails to be true only when we decide to expand
9592 comparsion using arithmetic that is not too common scenario. */
9594 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9595 &second_test, &bypass_test);
9596 compare_seq = get_insns ();
9599 if (second_test || bypass_test)
9601 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9602 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9603 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9605 code = GET_CODE (compare_op);
9606 if (code != LTU && code != GEU)
9608 emit_insn (compare_seq);
9612 if (!INTEGRAL_MODE_P (mode))
9620 /* Convert a==0 into (unsigned)a<1. */
9623 if (op1 != const0_rtx)
9626 code = (code == EQ ? LTU : GEU);
9629 /* Convert a>b into b<a or a>=b-1. */
9632 if (GET_CODE (op1) == CONST_INT)
9634 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9635 /* Bail out on overflow. We still can swap operands but that
9636 would force loading of the constant into register. */
9637 if (op1 == const0_rtx
9638 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9640 code = (code == GTU ? GEU : LTU);
9647 code = (code == GTU ? LTU : GEU);
9651 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9654 if (mode == DImode || op1 != const0_rtx)
9656 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9657 code = (code == LT ? GEU : LTU);
9661 if (mode == DImode || op1 != constm1_rtx)
9663 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9664 code = (code == LE ? GEU : LTU);
9670 ix86_compare_op0 = op0;
9671 ix86_compare_op1 = op1;
9672 *pop = ix86_expand_compare (code, NULL, NULL);
9673 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9679 ix86_expand_int_movcc (operands)
9682 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9683 rtx compare_seq, compare_op;
9684 rtx second_test, bypass_test;
9685 enum machine_mode mode = GET_MODE (operands[0]);
9686 bool sign_bit_compare_p = false;;
9689 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9690 compare_seq = get_insns ();
9693 compare_code = GET_CODE (compare_op);
9695 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9696 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9697 sign_bit_compare_p = true;
9699 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9700 HImode insns, we'd be swallowed in word prefix ops. */
9702 if ((mode != HImode || TARGET_FAST_PREFIX)
9703 && (mode != DImode || TARGET_64BIT)
9704 && GET_CODE (operands[2]) == CONST_INT
9705 && GET_CODE (operands[3]) == CONST_INT)
9707 rtx out = operands[0];
9708 HOST_WIDE_INT ct = INTVAL (operands[2]);
9709 HOST_WIDE_INT cf = INTVAL (operands[3]);
9713 /* Sign bit compares are better done using shifts than we do by using
9715 if (sign_bit_compare_p
9716 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9717 ix86_compare_op1, &compare_op))
9719 /* Detect overlap between destination and compare sources. */
9722 if (!sign_bit_compare_p)
9726 compare_code = GET_CODE (compare_op);
9728 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9729 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9732 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9735 /* To simplify rest of code, restrict to the GEU case. */
9736 if (compare_code == LTU)
9738 HOST_WIDE_INT tmp = ct;
9741 compare_code = reverse_condition (compare_code);
9742 code = reverse_condition (code);
9747 PUT_CODE (compare_op,
9748 reverse_condition_maybe_unordered
9749 (GET_CODE (compare_op)));
9751 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9755 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9756 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9757 tmp = gen_reg_rtx (mode);
9760 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9762 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9766 if (code == GT || code == GE)
9767 code = reverse_condition (code);
9770 HOST_WIDE_INT tmp = ct;
9775 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9776 ix86_compare_op1, VOIDmode, 0, -1);
9789 tmp = expand_simple_binop (mode, PLUS,
9791 copy_rtx (tmp), 1, OPTAB_DIRECT);
9802 tmp = expand_simple_binop (mode, IOR,
9804 copy_rtx (tmp), 1, OPTAB_DIRECT);
9806 else if (diff == -1 && ct)
9816 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9818 tmp = expand_simple_binop (mode, PLUS,
9819 copy_rtx (tmp), GEN_INT (cf),
9820 copy_rtx (tmp), 1, OPTAB_DIRECT);
9828 * andl cf - ct, dest
9838 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9841 tmp = expand_simple_binop (mode, AND,
9843 gen_int_mode (cf - ct, mode),
9844 copy_rtx (tmp), 1, OPTAB_DIRECT);
9846 tmp = expand_simple_binop (mode, PLUS,
9847 copy_rtx (tmp), GEN_INT (ct),
9848 copy_rtx (tmp), 1, OPTAB_DIRECT);
9851 if (!rtx_equal_p (tmp, out))
9852 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9854 return 1; /* DONE */
9860 tmp = ct, ct = cf, cf = tmp;
9862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9864 /* We may be reversing unordered compare to normal compare, that
9865 is not valid in general (we may convert non-trapping condition
9866 to trapping one), however on i386 we currently emit all
9867 comparisons unordered. */
9868 compare_code = reverse_condition_maybe_unordered (compare_code);
9869 code = reverse_condition_maybe_unordered (code);
9873 compare_code = reverse_condition (compare_code);
9874 code = reverse_condition (code);
9879 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9880 && GET_CODE (ix86_compare_op1) == CONST_INT)
9882 if (ix86_compare_op1 == const0_rtx
9883 && (code == LT || code == GE))
9884 compare_code = code;
9885 else if (ix86_compare_op1 == constm1_rtx)
9889 else if (code == GT)
9894 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9895 if (compare_code != NIL
9896 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9897 && (cf == -1 || ct == -1))
9899 /* If lea code below could be used, only optimize
9900 if it results in a 2 insn sequence. */
9902 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9903 || diff == 3 || diff == 5 || diff == 9)
9904 || (compare_code == LT && ct == -1)
9905 || (compare_code == GE && cf == -1))
9908 * notl op1 (if necessary)
9916 code = reverse_condition (code);
9919 out = emit_store_flag (out, code, ix86_compare_op0,
9920 ix86_compare_op1, VOIDmode, 0, -1);
9922 out = expand_simple_binop (mode, IOR,
9924 out, 1, OPTAB_DIRECT);
9925 if (out != operands[0])
9926 emit_move_insn (operands[0], out);
9928 return 1; /* DONE */
9933 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9934 || diff == 3 || diff == 5 || diff == 9)
9935 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9936 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9942 * lea cf(dest*(ct-cf)),dest
9946 * This also catches the degenerate setcc-only case.
9952 out = emit_store_flag (out, code, ix86_compare_op0,
9953 ix86_compare_op1, VOIDmode, 0, 1);
9956 /* On x86_64 the lea instruction operates on Pmode, so we need
9957 to get arithmetics done in proper mode to match. */
9959 tmp = copy_rtx (out);
9963 out1 = copy_rtx (out);
9964 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9968 tmp = gen_rtx_PLUS (mode, tmp, out1);
9974 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9977 if (!rtx_equal_p (tmp, out))
9980 out = force_operand (tmp, copy_rtx (out));
9982 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9984 if (!rtx_equal_p (out, operands[0]))
9985 emit_move_insn (operands[0], copy_rtx (out));
9987 return 1; /* DONE */
9991 * General case: Jumpful:
9992 * xorl dest,dest cmpl op1, op2
9993 * cmpl op1, op2 movl ct, dest
9995 * decl dest movl cf, dest
9996 * andl (cf-ct),dest 1:
10001 * This is reasonably steep, but branch mispredict costs are
10002 * high on modern cpus, so consider failing only if optimizing
10006 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10007 && BRANCH_COST >= 2)
10013 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10014 /* We may be reversing unordered compare to normal compare,
10015 that is not valid in general (we may convert non-trapping
10016 condition to trapping one), however on i386 we currently
10017 emit all comparisons unordered. */
10018 code = reverse_condition_maybe_unordered (code);
10021 code = reverse_condition (code);
10022 if (compare_code != NIL)
10023 compare_code = reverse_condition (compare_code);
10027 if (compare_code != NIL)
10029 /* notl op1 (if needed)
10034 For x < 0 (resp. x <= -1) there will be no notl,
10035 so if possible swap the constants to get rid of the
10037 True/false will be -1/0 while code below (store flag
10038 followed by decrement) is 0/-1, so the constants need
10039 to be exchanged once more. */
10041 if (compare_code == GE || !cf)
10043 code = reverse_condition (code);
10048 HOST_WIDE_INT tmp = cf;
10053 out = emit_store_flag (out, code, ix86_compare_op0,
10054 ix86_compare_op1, VOIDmode, 0, -1);
10058 out = emit_store_flag (out, code, ix86_compare_op0,
10059 ix86_compare_op1, VOIDmode, 0, 1);
10061 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10062 copy_rtx (out), 1, OPTAB_DIRECT);
10065 out = expand_simple_binop (mode, AND, copy_rtx (out),
10066 gen_int_mode (cf - ct, mode),
10067 copy_rtx (out), 1, OPTAB_DIRECT);
10069 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10070 copy_rtx (out), 1, OPTAB_DIRECT);
10071 if (!rtx_equal_p (out, operands[0]))
10072 emit_move_insn (operands[0], copy_rtx (out));
10074 return 1; /* DONE */
10078 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10080 /* Try a few things more with specific constants and a variable. */
10083 rtx var, orig_out, out, tmp;
10085 if (BRANCH_COST <= 2)
10086 return 0; /* FAIL */
10088 /* If one of the two operands is an interesting constant, load a
10089 constant with the above and mask it in with a logical operation. */
10091 if (GET_CODE (operands[2]) == CONST_INT)
10094 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10095 operands[3] = constm1_rtx, op = and_optab;
10096 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10097 operands[3] = const0_rtx, op = ior_optab;
10099 return 0; /* FAIL */
10101 else if (GET_CODE (operands[3]) == CONST_INT)
10104 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10105 operands[2] = constm1_rtx, op = and_optab;
10106 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10107 operands[2] = const0_rtx, op = ior_optab;
10109 return 0; /* FAIL */
10112 return 0; /* FAIL */
10114 orig_out = operands[0];
10115 tmp = gen_reg_rtx (mode);
10118 /* Recurse to get the constant loaded. */
10119 if (ix86_expand_int_movcc (operands) == 0)
10120 return 0; /* FAIL */
10122 /* Mask in the interesting variable. */
10123 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10125 if (!rtx_equal_p (out, orig_out))
10126 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10128 return 1; /* DONE */
10132 * For comparison with above,
10142 if (! nonimmediate_operand (operands[2], mode))
10143 operands[2] = force_reg (mode, operands[2]);
10144 if (! nonimmediate_operand (operands[3], mode))
10145 operands[3] = force_reg (mode, operands[3]);
10147 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10149 rtx tmp = gen_reg_rtx (mode);
10150 emit_move_insn (tmp, operands[3]);
10153 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10155 rtx tmp = gen_reg_rtx (mode);
10156 emit_move_insn (tmp, operands[2]);
10160 if (! register_operand (operands[2], VOIDmode)
10162 || ! register_operand (operands[3], VOIDmode)))
10163 operands[2] = force_reg (mode, operands[2]);
10166 && ! register_operand (operands[3], VOIDmode))
10167 operands[3] = force_reg (mode, operands[3]);
10169 emit_insn (compare_seq);
10170 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10171 gen_rtx_IF_THEN_ELSE (mode,
10172 compare_op, operands[2],
10175 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10176 gen_rtx_IF_THEN_ELSE (mode,
10178 copy_rtx (operands[3]),
10179 copy_rtx (operands[0]))));
10181 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10182 gen_rtx_IF_THEN_ELSE (mode,
10184 copy_rtx (operands[2]),
10185 copy_rtx (operands[0]))));
10187 return 1; /* DONE */
10191 ix86_expand_fp_movcc (operands)
10194 enum rtx_code code;
10196 rtx compare_op, second_test, bypass_test;
10198 /* For SF/DFmode conditional moves based on comparisons
10199 in same mode, we may want to use SSE min/max instructions. */
10200 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10201 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10202 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10203 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10204 && (!TARGET_IEEE_FP
10205 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10206 /* We may be called from the post-reload splitter. */
10207 && (!REG_P (operands[0])
10208 || SSE_REG_P (operands[0])
10209 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10211 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10212 code = GET_CODE (operands[1]);
10214 /* See if we have (cross) match between comparison operands and
10215 conditional move operands. */
10216 if (rtx_equal_p (operands[2], op1))
10221 code = reverse_condition_maybe_unordered (code);
10223 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10225 /* Check for min operation. */
10226 if (code == LT || code == UNLE)
10234 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10235 if (memory_operand (op0, VOIDmode))
10236 op0 = force_reg (GET_MODE (operands[0]), op0);
10237 if (GET_MODE (operands[0]) == SFmode)
10238 emit_insn (gen_minsf3 (operands[0], op0, op1));
10240 emit_insn (gen_mindf3 (operands[0], op0, op1));
10243 /* Check for max operation. */
10244 if (code == GT || code == UNGE)
10252 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10253 if (memory_operand (op0, VOIDmode))
10254 op0 = force_reg (GET_MODE (operands[0]), op0);
10255 if (GET_MODE (operands[0]) == SFmode)
10256 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10258 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10262 /* Manage condition to be sse_comparison_operator. In case we are
10263 in non-ieee mode, try to canonicalize the destination operand
10264 to be first in the comparison - this helps reload to avoid extra
10266 if (!sse_comparison_operator (operands[1], VOIDmode)
10267 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10269 rtx tmp = ix86_compare_op0;
10270 ix86_compare_op0 = ix86_compare_op1;
10271 ix86_compare_op1 = tmp;
10272 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10273 VOIDmode, ix86_compare_op0,
10276 /* Similarly try to manage result to be first operand of conditional
10277 move. We also don't support the NE comparison on SSE, so try to
10279 if ((rtx_equal_p (operands[0], operands[3])
10280 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10281 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10283 rtx tmp = operands[2];
10284 operands[2] = operands[3];
10286 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10287 (GET_CODE (operands[1])),
10288 VOIDmode, ix86_compare_op0,
10291 if (GET_MODE (operands[0]) == SFmode)
10292 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10293 operands[2], operands[3],
10294 ix86_compare_op0, ix86_compare_op1));
10296 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10297 operands[2], operands[3],
10298 ix86_compare_op0, ix86_compare_op1));
10302 /* The floating point conditional move instructions don't directly
10303 support conditions resulting from a signed integer comparison. */
10305 code = GET_CODE (operands[1]);
10306 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10308 /* The floating point conditional move instructions don't directly
10309 support signed integer comparisons. */
10311 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10313 if (second_test != NULL || bypass_test != NULL)
10315 tmp = gen_reg_rtx (QImode);
10316 ix86_expand_setcc (code, tmp);
10318 ix86_compare_op0 = tmp;
10319 ix86_compare_op1 = const0_rtx;
10320 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10322 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10324 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10325 emit_move_insn (tmp, operands[3]);
10328 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10330 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10331 emit_move_insn (tmp, operands[2]);
10335 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10336 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10341 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10342 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10347 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10348 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10356 /* Expand conditional increment or decrement using adb/sbb instructions.
10357 The default case using setcc followed by the conditional move can be
10358 done by generic code. */
10360 ix86_expand_int_addcc (operands)
10363 enum rtx_code code = GET_CODE (operands[1]);
10365 rtx val = const0_rtx;
10366 bool fpcmp = false;
10367 enum machine_mode mode = GET_MODE (operands[0]);
10369 if (operands[3] != const1_rtx
10370 && operands[3] != constm1_rtx)
10372 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10373 ix86_compare_op1, &compare_op))
10375 code = GET_CODE (compare_op);
10377 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10378 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10381 code = ix86_fp_compare_code_to_integer (code);
10388 PUT_CODE (compare_op,
10389 reverse_condition_maybe_unordered
10390 (GET_CODE (compare_op)));
10392 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10394 PUT_MODE (compare_op, mode);
10396 /* Construct either adc or sbb insn. */
10397 if ((code == LTU) == (operands[3] == constm1_rtx))
10399 switch (GET_MODE (operands[0]))
10402 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10405 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10408 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10411 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10419 switch (GET_MODE (operands[0]))
10422 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10425 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10428 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10431 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10437 return 1; /* DONE */
10441 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10442 works for floating pointer parameters and nonoffsetable memories.
10443 For pushes, it returns just stack offsets; the values will be saved
10444 in the right order. Maximally three parts are generated. */
10447 ix86_split_to_parts (operand, parts, mode)
10450 enum machine_mode mode;
10455 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10457 size = (GET_MODE_SIZE (mode) + 4) / 8;
10459 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10461 if (size < 2 || size > 3)
10464 /* Optimize constant pool reference to immediates. This is used by fp
10465 moves, that force all constants to memory to allow combining. */
10466 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10468 rtx tmp = maybe_get_pool_constant (operand);
10473 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10475 /* The only non-offsetable memories we handle are pushes. */
10476 if (! push_operand (operand, VOIDmode))
10479 operand = copy_rtx (operand);
10480 PUT_MODE (operand, Pmode);
10481 parts[0] = parts[1] = parts[2] = operand;
10483 else if (!TARGET_64BIT)
10485 if (mode == DImode)
10486 split_di (&operand, 1, &parts[0], &parts[1]);
10489 if (REG_P (operand))
10491 if (!reload_completed)
10493 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10494 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10496 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10498 else if (offsettable_memref_p (operand))
10500 operand = adjust_address (operand, SImode, 0);
10501 parts[0] = operand;
10502 parts[1] = adjust_address (operand, SImode, 4);
10504 parts[2] = adjust_address (operand, SImode, 8);
10506 else if (GET_CODE (operand) == CONST_DOUBLE)
10511 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10516 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10517 parts[2] = gen_int_mode (l[2], SImode);
10520 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10525 parts[1] = gen_int_mode (l[1], SImode);
10526 parts[0] = gen_int_mode (l[0], SImode);
10534 if (mode == TImode)
10535 split_ti (&operand, 1, &parts[0], &parts[1]);
10536 if (mode == XFmode || mode == TFmode)
10538 if (REG_P (operand))
10540 if (!reload_completed)
10542 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10543 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10545 else if (offsettable_memref_p (operand))
10547 operand = adjust_address (operand, DImode, 0);
10548 parts[0] = operand;
10549 parts[1] = adjust_address (operand, SImode, 8);
10551 else if (GET_CODE (operand) == CONST_DOUBLE)
10556 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10557 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10558 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10559 if (HOST_BITS_PER_WIDE_INT >= 64)
10562 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10563 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10566 parts[0] = immed_double_const (l[0], l[1], DImode);
10567 parts[1] = gen_int_mode (l[2], SImode);
10577 /* Emit insns to perform a move or push of DI, DF, and XF values.
10578 Return false when normal moves are needed; true when all required
10579 insns have been emitted. Operands 2-4 contain the input values
10580 int the correct order; operands 5-7 contain the output values. */
10583 ix86_split_long_move (operands)
10589 int collisions = 0;
10590 enum machine_mode mode = GET_MODE (operands[0]);
10592 /* The DFmode expanders may ask us to move double.
10593 For 64bit target this is single move. By hiding the fact
10594 here we simplify i386.md splitters. */
10595 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10597 /* Optimize constant pool reference to immediates. This is used by
10598 fp moves, that force all constants to memory to allow combining. */
10600 if (GET_CODE (operands[1]) == MEM
10601 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10602 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10603 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10604 if (push_operand (operands[0], VOIDmode))
10606 operands[0] = copy_rtx (operands[0]);
10607 PUT_MODE (operands[0], Pmode);
10610 operands[0] = gen_lowpart (DImode, operands[0]);
10611 operands[1] = gen_lowpart (DImode, operands[1]);
10612 emit_move_insn (operands[0], operands[1]);
10616 /* The only non-offsettable memory we handle is push. */
10617 if (push_operand (operands[0], VOIDmode))
10619 else if (GET_CODE (operands[0]) == MEM
10620 && ! offsettable_memref_p (operands[0]))
10623 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10624 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10626 /* When emitting push, take care for source operands on the stack. */
10627 if (push && GET_CODE (operands[1]) == MEM
10628 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10631 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10632 XEXP (part[1][2], 0));
10633 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10634 XEXP (part[1][1], 0));
10637 /* We need to do copy in the right order in case an address register
10638 of the source overlaps the destination. */
10639 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10641 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10643 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10646 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10649 /* Collision in the middle part can be handled by reordering. */
10650 if (collisions == 1 && nparts == 3
10651 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10654 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10655 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10658 /* If there are more collisions, we can't handle it by reordering.
10659 Do an lea to the last part and use only one colliding move. */
10660 else if (collisions > 1)
10666 base = part[0][nparts - 1];
10668 /* Handle the case when the last part isn't valid for lea.
10669 Happens in 64-bit mode storing the 12-byte XFmode. */
10670 if (GET_MODE (base) != Pmode)
10671 base = gen_rtx_REG (Pmode, REGNO (base));
10673 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10674 part[1][0] = replace_equiv_address (part[1][0], base);
10675 part[1][1] = replace_equiv_address (part[1][1],
10676 plus_constant (base, UNITS_PER_WORD));
10678 part[1][2] = replace_equiv_address (part[1][2],
10679 plus_constant (base, 8));
10689 /* We use only first 12 bytes of TFmode value, but for pushing we
10690 are required to adjust stack as if we were pushing real 16byte
10692 if (mode == TFmode && !TARGET_64BIT)
10693 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10695 emit_move_insn (part[0][2], part[1][2]);
10700 /* In 64bit mode we don't have 32bit push available. In case this is
10701 register, it is OK - we will just use larger counterpart. We also
10702 retype memory - these comes from attempt to avoid REX prefix on
10703 moving of second half of TFmode value. */
10704 if (GET_MODE (part[1][1]) == SImode)
10706 if (GET_CODE (part[1][1]) == MEM)
10707 part[1][1] = adjust_address (part[1][1], DImode, 0);
10708 else if (REG_P (part[1][1]))
10709 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10712 if (GET_MODE (part[1][0]) == SImode)
10713 part[1][0] = part[1][1];
10716 emit_move_insn (part[0][1], part[1][1]);
10717 emit_move_insn (part[0][0], part[1][0]);
10721 /* Choose correct order to not overwrite the source before it is copied. */
10722 if ((REG_P (part[0][0])
10723 && REG_P (part[1][1])
10724 && (REGNO (part[0][0]) == REGNO (part[1][1])
10726 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10728 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10732 operands[2] = part[0][2];
10733 operands[3] = part[0][1];
10734 operands[4] = part[0][0];
10735 operands[5] = part[1][2];
10736 operands[6] = part[1][1];
10737 operands[7] = part[1][0];
10741 operands[2] = part[0][1];
10742 operands[3] = part[0][0];
10743 operands[5] = part[1][1];
10744 operands[6] = part[1][0];
10751 operands[2] = part[0][0];
10752 operands[3] = part[0][1];
10753 operands[4] = part[0][2];
10754 operands[5] = part[1][0];
10755 operands[6] = part[1][1];
10756 operands[7] = part[1][2];
10760 operands[2] = part[0][0];
10761 operands[3] = part[0][1];
10762 operands[5] = part[1][0];
10763 operands[6] = part[1][1];
10766 emit_move_insn (operands[2], operands[5]);
10767 emit_move_insn (operands[3], operands[6]);
10769 emit_move_insn (operands[4], operands[7]);
10775 ix86_split_ashldi (operands, scratch)
10776 rtx *operands, scratch;
10778 rtx low[2], high[2];
10781 if (GET_CODE (operands[2]) == CONST_INT)
10783 split_di (operands, 2, low, high);
10784 count = INTVAL (operands[2]) & 63;
10788 emit_move_insn (high[0], low[1]);
10789 emit_move_insn (low[0], const0_rtx);
10792 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10796 if (!rtx_equal_p (operands[0], operands[1]))
10797 emit_move_insn (operands[0], operands[1]);
10798 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10799 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10804 if (!rtx_equal_p (operands[0], operands[1]))
10805 emit_move_insn (operands[0], operands[1]);
10807 split_di (operands, 1, low, high);
10809 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10810 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10812 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10814 if (! no_new_pseudos)
10815 scratch = force_reg (SImode, const0_rtx);
10817 emit_move_insn (scratch, const0_rtx);
10819 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10823 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10828 ix86_split_ashrdi (operands, scratch)
10829 rtx *operands, scratch;
10831 rtx low[2], high[2];
10834 if (GET_CODE (operands[2]) == CONST_INT)
10836 split_di (operands, 2, low, high);
10837 count = INTVAL (operands[2]) & 63;
10841 emit_move_insn (low[0], high[1]);
10843 if (! reload_completed)
10844 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10847 emit_move_insn (high[0], low[0]);
10848 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10852 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10856 if (!rtx_equal_p (operands[0], operands[1]))
10857 emit_move_insn (operands[0], operands[1]);
10858 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10859 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10864 if (!rtx_equal_p (operands[0], operands[1]))
10865 emit_move_insn (operands[0], operands[1]);
10867 split_di (operands, 1, low, high);
10869 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10870 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10872 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10874 if (! no_new_pseudos)
10875 scratch = gen_reg_rtx (SImode);
10876 emit_move_insn (scratch, high[0]);
10877 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10878 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10882 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10887 ix86_split_lshrdi (operands, scratch)
10888 rtx *operands, scratch;
10890 rtx low[2], high[2];
10893 if (GET_CODE (operands[2]) == CONST_INT)
10895 split_di (operands, 2, low, high);
10896 count = INTVAL (operands[2]) & 63;
10900 emit_move_insn (low[0], high[1]);
10901 emit_move_insn (high[0], const0_rtx);
10904 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10910 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10911 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10916 if (!rtx_equal_p (operands[0], operands[1]))
10917 emit_move_insn (operands[0], operands[1]);
10919 split_di (operands, 1, low, high);
10921 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10922 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10924 /* Heh. By reversing the arguments, we can reuse this pattern. */
10925 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10927 if (! no_new_pseudos)
10928 scratch = force_reg (SImode, const0_rtx);
10930 emit_move_insn (scratch, const0_rtx);
10932 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10936 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10940 /* Helper function for the string operations below. Dest VARIABLE whether
10941 it is aligned to VALUE bytes. If true, jump to the label. */
10943 ix86_expand_aligntest (variable, value)
10947 rtx label = gen_label_rtx ();
10948 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10949 if (GET_MODE (variable) == DImode)
10950 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10952 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10953 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10958 /* Adjust COUNTER by the VALUE. */
10960 ix86_adjust_counter (countreg, value)
10962 HOST_WIDE_INT value;
10964 if (GET_MODE (countreg) == DImode)
10965 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10967 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10970 /* Zero extend possibly SImode EXP to Pmode register. */
10972 ix86_zero_extend_to_Pmode (exp)
10976 if (GET_MODE (exp) == VOIDmode)
10977 return force_reg (Pmode, exp);
10978 if (GET_MODE (exp) == Pmode)
10979 return copy_to_mode_reg (Pmode, exp);
10980 r = gen_reg_rtx (Pmode);
10981 emit_insn (gen_zero_extendsidi2 (r, exp));
10985 /* Expand string move (memcpy) operation. Use i386 string operations when
10986 profitable. expand_clrstr contains similar code. */
10988 ix86_expand_movstr (dst, src, count_exp, align_exp)
10989 rtx dst, src, count_exp, align_exp;
10991 rtx srcreg, destreg, countreg;
10992 enum machine_mode counter_mode;
10993 HOST_WIDE_INT align = 0;
10994 unsigned HOST_WIDE_INT count = 0;
10997 if (GET_CODE (align_exp) == CONST_INT)
10998 align = INTVAL (align_exp);
11000 /* Can't use any of this if the user has appropriated esi or edi. */
11001 if (global_regs[4] || global_regs[5])
11004 /* This simple hack avoids all inlining code and simplifies code below. */
11005 if (!TARGET_ALIGN_STRINGOPS)
11008 if (GET_CODE (count_exp) == CONST_INT)
11010 count = INTVAL (count_exp);
11011 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11015 /* Figure out proper mode for counter. For 32bits it is always SImode,
11016 for 64bits use SImode when possible, otherwise DImode.
11017 Set count to number of bytes copied when known at compile time. */
11018 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11019 || x86_64_zero_extended_value (count_exp))
11020 counter_mode = SImode;
11022 counter_mode = DImode;
11026 if (counter_mode != SImode && counter_mode != DImode)
11029 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11030 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11032 emit_insn (gen_cld ());
11034 /* When optimizing for size emit simple rep ; movsb instruction for
11035 counts not divisible by 4. */
11037 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11039 countreg = ix86_zero_extend_to_Pmode (count_exp);
11041 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11042 destreg, srcreg, countreg));
11044 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11045 destreg, srcreg, countreg));
11048 /* For constant aligned (or small unaligned) copies use rep movsl
11049 followed by code copying the rest. For PentiumPro ensure 8 byte
11050 alignment to allow rep movsl acceleration. */
11052 else if (count != 0
11054 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11055 || optimize_size || count < (unsigned int) 64))
11057 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11058 if (count & ~(size - 1))
11060 countreg = copy_to_mode_reg (counter_mode,
11061 GEN_INT ((count >> (size == 4 ? 2 : 3))
11062 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11063 countreg = ix86_zero_extend_to_Pmode (countreg);
11067 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11068 destreg, srcreg, countreg));
11070 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11071 destreg, srcreg, countreg));
11074 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11075 destreg, srcreg, countreg));
11077 if (size == 8 && (count & 0x04))
11078 emit_insn (gen_strmovsi (destreg, srcreg));
11080 emit_insn (gen_strmovhi (destreg, srcreg));
11082 emit_insn (gen_strmovqi (destreg, srcreg));
11084 /* The generic code based on the glibc implementation:
11085 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11086 allowing accelerated copying there)
11087 - copy the data using rep movsl
11088 - copy the rest. */
11093 int desired_alignment = (TARGET_PENTIUMPRO
11094 && (count == 0 || count >= (unsigned int) 260)
11095 ? 8 : UNITS_PER_WORD);
11097 /* In case we don't know anything about the alignment, default to
11098 library version, since it is usually equally fast and result in
11101 Also emit call when we know that the count is large and call overhead
11102 will not be important. */
11103 if (!TARGET_INLINE_ALL_STRINGOPS
11104 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11110 if (TARGET_SINGLE_STRINGOP)
11111 emit_insn (gen_cld ());
11113 countreg2 = gen_reg_rtx (Pmode);
11114 countreg = copy_to_mode_reg (counter_mode, count_exp);
11116 /* We don't use loops to align destination and to copy parts smaller
11117 than 4 bytes, because gcc is able to optimize such code better (in
11118 the case the destination or the count really is aligned, gcc is often
11119 able to predict the branches) and also it is friendlier to the
11120 hardware branch prediction.
11122 Using loops is beneficial for generic case, because we can
11123 handle small counts using the loops. Many CPUs (such as Athlon)
11124 have large REP prefix setup costs.
11126 This is quite costly. Maybe we can revisit this decision later or
11127 add some customizability to this code. */
11129 if (count == 0 && align < desired_alignment)
11131 label = gen_label_rtx ();
11132 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11133 LEU, 0, counter_mode, 1, label);
11137 rtx label = ix86_expand_aligntest (destreg, 1);
11138 emit_insn (gen_strmovqi (destreg, srcreg));
11139 ix86_adjust_counter (countreg, 1);
11140 emit_label (label);
11141 LABEL_NUSES (label) = 1;
11145 rtx label = ix86_expand_aligntest (destreg, 2);
11146 emit_insn (gen_strmovhi (destreg, srcreg));
11147 ix86_adjust_counter (countreg, 2);
11148 emit_label (label);
11149 LABEL_NUSES (label) = 1;
11151 if (align <= 4 && desired_alignment > 4)
11153 rtx label = ix86_expand_aligntest (destreg, 4);
11154 emit_insn (gen_strmovsi (destreg, srcreg));
11155 ix86_adjust_counter (countreg, 4);
11156 emit_label (label);
11157 LABEL_NUSES (label) = 1;
11160 if (label && desired_alignment > 4 && !TARGET_64BIT)
11162 emit_label (label);
11163 LABEL_NUSES (label) = 1;
11166 if (!TARGET_SINGLE_STRINGOP)
11167 emit_insn (gen_cld ());
11170 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11172 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11173 destreg, srcreg, countreg2));
11177 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11178 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11179 destreg, srcreg, countreg2));
11184 emit_label (label);
11185 LABEL_NUSES (label) = 1;
11187 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11188 emit_insn (gen_strmovsi (destreg, srcreg));
11189 if ((align <= 4 || count == 0) && TARGET_64BIT)
11191 rtx label = ix86_expand_aligntest (countreg, 4);
11192 emit_insn (gen_strmovsi (destreg, srcreg));
11193 emit_label (label);
11194 LABEL_NUSES (label) = 1;
11196 if (align > 2 && count != 0 && (count & 2))
11197 emit_insn (gen_strmovhi (destreg, srcreg));
11198 if (align <= 2 || count == 0)
11200 rtx label = ix86_expand_aligntest (countreg, 2);
11201 emit_insn (gen_strmovhi (destreg, srcreg));
11202 emit_label (label);
11203 LABEL_NUSES (label) = 1;
11205 if (align > 1 && count != 0 && (count & 1))
11206 emit_insn (gen_strmovqi (destreg, srcreg));
11207 if (align <= 1 || count == 0)
11209 rtx label = ix86_expand_aligntest (countreg, 1);
11210 emit_insn (gen_strmovqi (destreg, srcreg));
11211 emit_label (label);
11212 LABEL_NUSES (label) = 1;
11216 insns = get_insns ();
11219 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11224 /* Expand string clear operation (bzero). Use i386 string operations when
11225 profitable. expand_movstr contains similar code. */
11227 ix86_expand_clrstr (src, count_exp, align_exp)
11228 rtx src, count_exp, align_exp;
11230 rtx destreg, zeroreg, countreg;
11231 enum machine_mode counter_mode;
11232 HOST_WIDE_INT align = 0;
11233 unsigned HOST_WIDE_INT count = 0;
11235 if (GET_CODE (align_exp) == CONST_INT)
11236 align = INTVAL (align_exp);
11238 /* Can't use any of this if the user has appropriated esi. */
11239 if (global_regs[4])
11242 /* This simple hack avoids all inlining code and simplifies code below. */
11243 if (!TARGET_ALIGN_STRINGOPS)
11246 if (GET_CODE (count_exp) == CONST_INT)
11248 count = INTVAL (count_exp);
11249 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11252 /* Figure out proper mode for counter. For 32bits it is always SImode,
11253 for 64bits use SImode when possible, otherwise DImode.
11254 Set count to number of bytes copied when known at compile time. */
11255 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11256 || x86_64_zero_extended_value (count_exp))
11257 counter_mode = SImode;
11259 counter_mode = DImode;
11261 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11263 emit_insn (gen_cld ());
11265 /* When optimizing for size emit simple rep ; movsb instruction for
11266 counts not divisible by 4. */
11268 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11270 countreg = ix86_zero_extend_to_Pmode (count_exp);
11271 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11273 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11274 destreg, countreg));
11276 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11277 destreg, countreg));
11279 else if (count != 0
11281 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11282 || optimize_size || count < (unsigned int) 64))
11284 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11285 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11286 if (count & ~(size - 1))
11288 countreg = copy_to_mode_reg (counter_mode,
11289 GEN_INT ((count >> (size == 4 ? 2 : 3))
11290 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11291 countreg = ix86_zero_extend_to_Pmode (countreg);
11295 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11296 destreg, countreg));
11298 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11299 destreg, countreg));
11302 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11303 destreg, countreg));
11305 if (size == 8 && (count & 0x04))
11306 emit_insn (gen_strsetsi (destreg,
11307 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11309 emit_insn (gen_strsethi (destreg,
11310 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11312 emit_insn (gen_strsetqi (destreg,
11313 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11319 /* Compute desired alignment of the string operation. */
11320 int desired_alignment = (TARGET_PENTIUMPRO
11321 && (count == 0 || count >= (unsigned int) 260)
11322 ? 8 : UNITS_PER_WORD);
11324 /* In case we don't know anything about the alignment, default to
11325 library version, since it is usually equally fast and result in
11328 Also emit call when we know that the count is large and call overhead
11329 will not be important. */
11330 if (!TARGET_INLINE_ALL_STRINGOPS
11331 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11334 if (TARGET_SINGLE_STRINGOP)
11335 emit_insn (gen_cld ());
11337 countreg2 = gen_reg_rtx (Pmode);
11338 countreg = copy_to_mode_reg (counter_mode, count_exp);
11339 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11341 if (count == 0 && align < desired_alignment)
11343 label = gen_label_rtx ();
11344 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11345 LEU, 0, counter_mode, 1, label);
11349 rtx label = ix86_expand_aligntest (destreg, 1);
11350 emit_insn (gen_strsetqi (destreg,
11351 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11352 ix86_adjust_counter (countreg, 1);
11353 emit_label (label);
11354 LABEL_NUSES (label) = 1;
11358 rtx label = ix86_expand_aligntest (destreg, 2);
11359 emit_insn (gen_strsethi (destreg,
11360 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11361 ix86_adjust_counter (countreg, 2);
11362 emit_label (label);
11363 LABEL_NUSES (label) = 1;
11365 if (align <= 4 && desired_alignment > 4)
11367 rtx label = ix86_expand_aligntest (destreg, 4);
11368 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11369 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11371 ix86_adjust_counter (countreg, 4);
11372 emit_label (label);
11373 LABEL_NUSES (label) = 1;
11376 if (label && desired_alignment > 4 && !TARGET_64BIT)
11378 emit_label (label);
11379 LABEL_NUSES (label) = 1;
11383 if (!TARGET_SINGLE_STRINGOP)
11384 emit_insn (gen_cld ());
11387 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11389 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11390 destreg, countreg2));
11394 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11395 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11396 destreg, countreg2));
11400 emit_label (label);
11401 LABEL_NUSES (label) = 1;
11404 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11405 emit_insn (gen_strsetsi (destreg,
11406 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11407 if (TARGET_64BIT && (align <= 4 || count == 0))
11409 rtx label = ix86_expand_aligntest (countreg, 4);
11410 emit_insn (gen_strsetsi (destreg,
11411 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11412 emit_label (label);
11413 LABEL_NUSES (label) = 1;
11415 if (align > 2 && count != 0 && (count & 2))
11416 emit_insn (gen_strsethi (destreg,
11417 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11418 if (align <= 2 || count == 0)
11420 rtx label = ix86_expand_aligntest (countreg, 2);
11421 emit_insn (gen_strsethi (destreg,
11422 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11423 emit_label (label);
11424 LABEL_NUSES (label) = 1;
11426 if (align > 1 && count != 0 && (count & 1))
11427 emit_insn (gen_strsetqi (destreg,
11428 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11429 if (align <= 1 || count == 0)
11431 rtx label = ix86_expand_aligntest (countreg, 1);
11432 emit_insn (gen_strsetqi (destreg,
11433 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11434 emit_label (label);
11435 LABEL_NUSES (label) = 1;
11440 /* Expand strlen. */
11442 ix86_expand_strlen (out, src, eoschar, align)
11443 rtx out, src, eoschar, align;
11445 rtx addr, scratch1, scratch2, scratch3, scratch4;
11447 /* The generic case of strlen expander is long. Avoid it's
11448 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11450 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11451 && !TARGET_INLINE_ALL_STRINGOPS
11453 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11456 addr = force_reg (Pmode, XEXP (src, 0));
11457 scratch1 = gen_reg_rtx (Pmode);
11459 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11462 /* Well it seems that some optimizer does not combine a call like
11463 foo(strlen(bar), strlen(bar));
11464 when the move and the subtraction is done here. It does calculate
11465 the length just once when these instructions are done inside of
11466 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11467 often used and I use one fewer register for the lifetime of
11468 output_strlen_unroll() this is better. */
11470 emit_move_insn (out, addr);
11472 ix86_expand_strlensi_unroll_1 (out, align);
11474 /* strlensi_unroll_1 returns the address of the zero at the end of
11475 the string, like memchr(), so compute the length by subtracting
11476 the start address. */
11478 emit_insn (gen_subdi3 (out, out, addr));
11480 emit_insn (gen_subsi3 (out, out, addr));
11484 scratch2 = gen_reg_rtx (Pmode);
11485 scratch3 = gen_reg_rtx (Pmode);
11486 scratch4 = force_reg (Pmode, constm1_rtx);
11488 emit_move_insn (scratch3, addr);
11489 eoschar = force_reg (QImode, eoschar);
11491 emit_insn (gen_cld ());
11494 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11495 align, scratch4, scratch3));
11496 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11497 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11501 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11502 align, scratch4, scratch3));
11503 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11504 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11510 /* Expand the appropriate insns for doing strlen if not just doing
11513 out = result, initialized with the start address
11514 align_rtx = alignment of the address.
11515 scratch = scratch register, initialized with the startaddress when
11516 not aligned, otherwise undefined
11518 This is just the body. It needs the initialisations mentioned above and
11519 some address computing at the end. These things are done in i386.md. */
11522 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11523 rtx out, align_rtx;
11527 rtx align_2_label = NULL_RTX;
11528 rtx align_3_label = NULL_RTX;
11529 rtx align_4_label = gen_label_rtx ();
11530 rtx end_0_label = gen_label_rtx ();
11532 rtx tmpreg = gen_reg_rtx (SImode);
11533 rtx scratch = gen_reg_rtx (SImode);
11537 if (GET_CODE (align_rtx) == CONST_INT)
11538 align = INTVAL (align_rtx);
11540 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11542 /* Is there a known alignment and is it less than 4? */
11545 rtx scratch1 = gen_reg_rtx (Pmode);
11546 emit_move_insn (scratch1, out);
11547 /* Is there a known alignment and is it not 2? */
11550 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11551 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11553 /* Leave just the 3 lower bits. */
11554 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11555 NULL_RTX, 0, OPTAB_WIDEN);
11557 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11558 Pmode, 1, align_4_label);
11559 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11560 Pmode, 1, align_2_label);
11561 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11562 Pmode, 1, align_3_label);
11566 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11567 check if is aligned to 4 - byte. */
11569 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11570 NULL_RTX, 0, OPTAB_WIDEN);
11572 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11573 Pmode, 1, align_4_label);
11576 mem = gen_rtx_MEM (QImode, out);
11578 /* Now compare the bytes. */
11580 /* Compare the first n unaligned byte on a byte per byte basis. */
11581 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11582 QImode, 1, end_0_label);
11584 /* Increment the address. */
11586 emit_insn (gen_adddi3 (out, out, const1_rtx));
11588 emit_insn (gen_addsi3 (out, out, const1_rtx));
11590 /* Not needed with an alignment of 2 */
11593 emit_label (align_2_label);
11595 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11599 emit_insn (gen_adddi3 (out, out, const1_rtx));
11601 emit_insn (gen_addsi3 (out, out, const1_rtx));
11603 emit_label (align_3_label);
11606 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11610 emit_insn (gen_adddi3 (out, out, const1_rtx));
11612 emit_insn (gen_addsi3 (out, out, const1_rtx));
11615 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11616 align this loop. It gives only huge programs, but does not help to
11618 emit_label (align_4_label);
11620 mem = gen_rtx_MEM (SImode, out);
11621 emit_move_insn (scratch, mem);
11623 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11625 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11627 /* This formula yields a nonzero result iff one of the bytes is zero.
11628 This saves three branches inside loop and many cycles. */
11630 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11631 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11632 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11633 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11634 gen_int_mode (0x80808080, SImode)));
11635 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11640 rtx reg = gen_reg_rtx (SImode);
11641 rtx reg2 = gen_reg_rtx (Pmode);
11642 emit_move_insn (reg, tmpreg);
11643 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11645 /* If zero is not in the first two bytes, move two bytes forward. */
11646 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11647 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11648 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11649 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11650 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11653 /* Emit lea manually to avoid clobbering of flags. */
11654 emit_insn (gen_rtx_SET (SImode, reg2,
11655 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11657 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11658 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11659 emit_insn (gen_rtx_SET (VOIDmode, out,
11660 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11667 rtx end_2_label = gen_label_rtx ();
11668 /* Is zero in the first two bytes? */
11670 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11672 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11673 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11674 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11676 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11677 JUMP_LABEL (tmp) = end_2_label;
11679 /* Not in the first two. Move two bytes forward. */
11680 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11682 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11684 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11686 emit_label (end_2_label);
11690 /* Avoid branch in fixing the byte. */
11691 tmpreg = gen_lowpart (QImode, tmpreg);
11692 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11693 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11695 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11697 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11699 emit_label (end_0_label);
11703 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11704 rtx retval, fnaddr, callarg1, callarg2, pop;
11707 rtx use = NULL, call;
11709 if (pop == const0_rtx)
11711 if (TARGET_64BIT && pop)
11715 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11716 fnaddr = machopic_indirect_call_target (fnaddr);
11718 /* Static functions and indirect calls don't need the pic register. */
11719 if (! TARGET_64BIT && flag_pic
11720 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11721 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11722 use_reg (&use, pic_offset_table_rtx);
11724 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11726 rtx al = gen_rtx_REG (QImode, 0);
11727 emit_move_insn (al, callarg2);
11728 use_reg (&use, al);
11730 #endif /* TARGET_MACHO */
11732 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11734 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11735 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11737 if (sibcall && TARGET_64BIT
11738 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11741 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11742 fnaddr = gen_rtx_REG (Pmode, 40);
11743 emit_move_insn (fnaddr, addr);
11744 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11747 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11749 call = gen_rtx_SET (VOIDmode, retval, call);
11752 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11753 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11754 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11757 call = emit_call_insn (call);
11759 CALL_INSN_FUNCTION_USAGE (call) = use;
11763 /* Clear stack slot assignments remembered from previous functions.
11764 This is called from INIT_EXPANDERS once before RTL is emitted for each
11767 static struct machine_function *
11768 ix86_init_machine_status ()
11770 struct machine_function *f;
11772 f = ggc_alloc_cleared (sizeof (struct machine_function));
11773 f->use_fast_prologue_epilogue_nregs = -1;
11778 /* Return a MEM corresponding to a stack slot with mode MODE.
11779 Allocate a new slot if necessary.
11781 The RTL for a function can have several slots available: N is
11782 which slot to use. */
11785 assign_386_stack_local (mode, n)
11786 enum machine_mode mode;
11789 struct stack_local_entry *s;
11791 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11794 for (s = ix86_stack_locals; s; s = s->next)
11795 if (s->mode == mode && s->n == n)
11798 s = (struct stack_local_entry *)
11799 ggc_alloc (sizeof (struct stack_local_entry));
11802 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11804 s->next = ix86_stack_locals;
11805 ix86_stack_locals = s;
11809 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11811 static GTY(()) rtx ix86_tls_symbol;
11813 ix86_tls_get_addr ()
11816 if (!ix86_tls_symbol)
11818 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11819 (TARGET_GNU_TLS && !TARGET_64BIT)
11820 ? "___tls_get_addr"
11821 : "__tls_get_addr");
11824 return ix86_tls_symbol;
11827 /* Calculate the length of the memory address in the instruction
11828 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11831 memory_address_length (addr)
11834 struct ix86_address parts;
11835 rtx base, index, disp;
11838 if (GET_CODE (addr) == PRE_DEC
11839 || GET_CODE (addr) == POST_INC
11840 || GET_CODE (addr) == PRE_MODIFY
11841 || GET_CODE (addr) == POST_MODIFY)
11844 if (! ix86_decompose_address (addr, &parts))
11848 index = parts.index;
11852 /* Register Indirect. */
11853 if (base && !index && !disp)
11855 /* Special cases: ebp and esp need the two-byte modrm form. */
11856 if (addr == stack_pointer_rtx
11857 || addr == arg_pointer_rtx
11858 || addr == frame_pointer_rtx
11859 || addr == hard_frame_pointer_rtx)
11863 /* Direct Addressing. */
11864 else if (disp && !base && !index)
11869 /* Find the length of the displacement constant. */
11872 if (GET_CODE (disp) == CONST_INT
11873 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11880 /* An index requires the two-byte modrm form. */
11888 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11889 is set, expect that insn have 8bit immediate alternative. */
11891 ix86_attr_length_immediate_default (insn, shortform)
11897 extract_insn_cached (insn);
11898 for (i = recog_data.n_operands - 1; i >= 0; --i)
11899 if (CONSTANT_P (recog_data.operand[i]))
11904 && GET_CODE (recog_data.operand[i]) == CONST_INT
11905 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11909 switch (get_attr_mode (insn))
11920 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11925 fatal_insn ("unknown insn mode", insn);
11931 /* Compute default value for "length_address" attribute. */
11933 ix86_attr_length_address_default (insn)
11938 if (get_attr_type (insn) == TYPE_LEA)
11940 rtx set = PATTERN (insn);
11941 if (GET_CODE (set) == SET)
11943 else if (GET_CODE (set) == PARALLEL
11944 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11945 set = XVECEXP (set, 0, 0);
11948 #ifdef ENABLE_CHECKING
11954 return memory_address_length (SET_SRC (set));
11957 extract_insn_cached (insn);
11958 for (i = recog_data.n_operands - 1; i >= 0; --i)
11959 if (GET_CODE (recog_data.operand[i]) == MEM)
11961 return memory_address_length (XEXP (recog_data.operand[i], 0));
11967 /* Return the maximum number of instructions a cpu can issue. */
11974 case PROCESSOR_PENTIUM:
11978 case PROCESSOR_PENTIUMPRO:
11979 case PROCESSOR_PENTIUM4:
11980 case PROCESSOR_ATHLON:
11989 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11990 by DEP_INSN and nothing set by DEP_INSN. */
11993 ix86_flags_dependant (insn, dep_insn, insn_type)
11994 rtx insn, dep_insn;
11995 enum attr_type insn_type;
11999 /* Simplify the test for uninteresting insns. */
12000 if (insn_type != TYPE_SETCC
12001 && insn_type != TYPE_ICMOV
12002 && insn_type != TYPE_FCMOV
12003 && insn_type != TYPE_IBR)
12006 if ((set = single_set (dep_insn)) != 0)
12008 set = SET_DEST (set);
12011 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12012 && XVECLEN (PATTERN (dep_insn), 0) == 2
12013 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12014 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12016 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12017 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12022 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12025 /* This test is true if the dependent insn reads the flags but
12026 not any other potentially set register. */
12027 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12030 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12036 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12037 address with operands set by DEP_INSN. */
12040 ix86_agi_dependant (insn, dep_insn, insn_type)
12041 rtx insn, dep_insn;
12042 enum attr_type insn_type;
12046 if (insn_type == TYPE_LEA
12049 addr = PATTERN (insn);
12050 if (GET_CODE (addr) == SET)
12052 else if (GET_CODE (addr) == PARALLEL
12053 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12054 addr = XVECEXP (addr, 0, 0);
12057 addr = SET_SRC (addr);
12062 extract_insn_cached (insn);
12063 for (i = recog_data.n_operands - 1; i >= 0; --i)
12064 if (GET_CODE (recog_data.operand[i]) == MEM)
12066 addr = XEXP (recog_data.operand[i], 0);
12073 return modified_in_p (addr, dep_insn);
12077 ix86_adjust_cost (insn, link, dep_insn, cost)
12078 rtx insn, link, dep_insn;
12081 enum attr_type insn_type, dep_insn_type;
12082 enum attr_memory memory, dep_memory;
12084 int dep_insn_code_number;
12086 /* Anti and output dependencies have zero cost on all CPUs. */
12087 if (REG_NOTE_KIND (link) != 0)
12090 dep_insn_code_number = recog_memoized (dep_insn);
12092 /* If we can't recognize the insns, we can't really do anything. */
12093 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12096 insn_type = get_attr_type (insn);
12097 dep_insn_type = get_attr_type (dep_insn);
12101 case PROCESSOR_PENTIUM:
12102 /* Address Generation Interlock adds a cycle of latency. */
12103 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12106 /* ??? Compares pair with jump/setcc. */
12107 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12110 /* Floating point stores require value to be ready one cycle earlier. */
12111 if (insn_type == TYPE_FMOV
12112 && get_attr_memory (insn) == MEMORY_STORE
12113 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12117 case PROCESSOR_PENTIUMPRO:
12118 memory = get_attr_memory (insn);
12119 dep_memory = get_attr_memory (dep_insn);
12121 /* Since we can't represent delayed latencies of load+operation,
12122 increase the cost here for non-imov insns. */
12123 if (dep_insn_type != TYPE_IMOV
12124 && dep_insn_type != TYPE_FMOV
12125 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12128 /* INT->FP conversion is expensive. */
12129 if (get_attr_fp_int_src (dep_insn))
12132 /* There is one cycle extra latency between an FP op and a store. */
12133 if (insn_type == TYPE_FMOV
12134 && (set = single_set (dep_insn)) != NULL_RTX
12135 && (set2 = single_set (insn)) != NULL_RTX
12136 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12137 && GET_CODE (SET_DEST (set2)) == MEM)
12140 /* Show ability of reorder buffer to hide latency of load by executing
12141 in parallel with previous instruction in case
12142 previous instruction is not needed to compute the address. */
12143 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12144 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12146 /* Claim moves to take one cycle, as core can issue one load
12147 at time and the next load can start cycle later. */
12148 if (dep_insn_type == TYPE_IMOV
12149 || dep_insn_type == TYPE_FMOV)
12157 memory = get_attr_memory (insn);
12158 dep_memory = get_attr_memory (dep_insn);
12159 /* The esp dependency is resolved before the instruction is really
12161 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12162 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12165 /* Since we can't represent delayed latencies of load+operation,
12166 increase the cost here for non-imov insns. */
12167 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12168 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12170 /* INT->FP conversion is expensive. */
12171 if (get_attr_fp_int_src (dep_insn))
12174 /* Show ability of reorder buffer to hide latency of load by executing
12175 in parallel with previous instruction in case
12176 previous instruction is not needed to compute the address. */
12177 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12178 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12180 /* Claim moves to take one cycle, as core can issue one load
12181 at time and the next load can start cycle later. */
12182 if (dep_insn_type == TYPE_IMOV
12183 || dep_insn_type == TYPE_FMOV)
12192 case PROCESSOR_ATHLON:
12194 memory = get_attr_memory (insn);
12195 dep_memory = get_attr_memory (dep_insn);
12197 /* Show ability of reorder buffer to hide latency of load by executing
12198 in parallel with previous instruction in case
12199 previous instruction is not needed to compute the address. */
12200 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12201 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12203 enum attr_unit unit = get_attr_unit (insn);
12206 /* Because of the difference between the length of integer and
12207 floating unit pipeline preparation stages, the memory operands
12208 for floating point are cheaper.
12210 ??? For Athlon it the difference is most propbably 2. */
12211 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12214 loadcost = TARGET_ATHLON ? 2 : 0;
12216 if (cost >= loadcost)
12231 struct ppro_sched_data
12234 int issued_this_cycle;
12238 static enum attr_ppro_uops
12239 ix86_safe_ppro_uops (insn)
12242 if (recog_memoized (insn) >= 0)
12243 return get_attr_ppro_uops (insn);
12245 return PPRO_UOPS_MANY;
12249 ix86_dump_ppro_packet (dump)
12252 if (ix86_sched_data.ppro.decode[0])
12254 fprintf (dump, "PPRO packet: %d",
12255 INSN_UID (ix86_sched_data.ppro.decode[0]));
12256 if (ix86_sched_data.ppro.decode[1])
12257 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12258 if (ix86_sched_data.ppro.decode[2])
12259 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12260 fputc ('\n', dump);
12264 /* We're beginning a new block. Initialize data structures as necessary. */
12267 ix86_sched_init (dump, sched_verbose, veclen)
12268 FILE *dump ATTRIBUTE_UNUSED;
12269 int sched_verbose ATTRIBUTE_UNUSED;
12270 int veclen ATTRIBUTE_UNUSED;
12272 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12275 /* Shift INSN to SLOT, and shift everything else down. */
12278 ix86_reorder_insn (insnp, slot)
12285 insnp[0] = insnp[1];
12286 while (++insnp != slot);
12292 ix86_sched_reorder_ppro (ready, e_ready)
12297 enum attr_ppro_uops cur_uops;
12298 int issued_this_cycle;
12302 /* At this point .ppro.decode contains the state of the three
12303 decoders from last "cycle". That is, those insns that were
12304 actually independent. But here we're scheduling for the
12305 decoder, and we may find things that are decodable in the
12308 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12309 issued_this_cycle = 0;
12312 cur_uops = ix86_safe_ppro_uops (*insnp);
12314 /* If the decoders are empty, and we've a complex insn at the
12315 head of the priority queue, let it issue without complaint. */
12316 if (decode[0] == NULL)
12318 if (cur_uops == PPRO_UOPS_MANY)
12320 decode[0] = *insnp;
12324 /* Otherwise, search for a 2-4 uop unsn to issue. */
12325 while (cur_uops != PPRO_UOPS_FEW)
12327 if (insnp == ready)
12329 cur_uops = ix86_safe_ppro_uops (*--insnp);
12332 /* If so, move it to the head of the line. */
12333 if (cur_uops == PPRO_UOPS_FEW)
12334 ix86_reorder_insn (insnp, e_ready);
12336 /* Issue the head of the queue. */
12337 issued_this_cycle = 1;
12338 decode[0] = *e_ready--;
12341 /* Look for simple insns to fill in the other two slots. */
12342 for (i = 1; i < 3; ++i)
12343 if (decode[i] == NULL)
12345 if (ready > e_ready)
12349 cur_uops = ix86_safe_ppro_uops (*insnp);
12350 while (cur_uops != PPRO_UOPS_ONE)
12352 if (insnp == ready)
12354 cur_uops = ix86_safe_ppro_uops (*--insnp);
12357 /* Found one. Move it to the head of the queue and issue it. */
12358 if (cur_uops == PPRO_UOPS_ONE)
12360 ix86_reorder_insn (insnp, e_ready);
12361 decode[i] = *e_ready--;
12362 issued_this_cycle++;
12366 /* ??? Didn't find one. Ideally, here we would do a lazy split
12367 of 2-uop insns, issue one and queue the other. */
12371 if (issued_this_cycle == 0)
12372 issued_this_cycle = 1;
12373 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12376 /* We are about to being issuing insns for this clock cycle.
12377 Override the default sort algorithm to better slot instructions. */
12379 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12380 FILE *dump ATTRIBUTE_UNUSED;
12381 int sched_verbose ATTRIBUTE_UNUSED;
12384 int clock_var ATTRIBUTE_UNUSED;
12386 int n_ready = *n_readyp;
12387 rtx *e_ready = ready + n_ready - 1;
12389 /* Make sure to go ahead and initialize key items in
12390 ix86_sched_data if we are not going to bother trying to
12391 reorder the ready queue. */
12394 ix86_sched_data.ppro.issued_this_cycle = 1;
12403 case PROCESSOR_PENTIUMPRO:
12404 ix86_sched_reorder_ppro (ready, e_ready);
12409 return ix86_issue_rate ();
12412 /* We are about to issue INSN. Return the number of insns left on the
12413 ready queue that can be issued this cycle. */
12416 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12420 int can_issue_more;
12426 return can_issue_more - 1;
12428 case PROCESSOR_PENTIUMPRO:
12430 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12432 if (uops == PPRO_UOPS_MANY)
12435 ix86_dump_ppro_packet (dump);
12436 ix86_sched_data.ppro.decode[0] = insn;
12437 ix86_sched_data.ppro.decode[1] = NULL;
12438 ix86_sched_data.ppro.decode[2] = NULL;
12440 ix86_dump_ppro_packet (dump);
12441 ix86_sched_data.ppro.decode[0] = NULL;
12443 else if (uops == PPRO_UOPS_FEW)
12446 ix86_dump_ppro_packet (dump);
12447 ix86_sched_data.ppro.decode[0] = insn;
12448 ix86_sched_data.ppro.decode[1] = NULL;
12449 ix86_sched_data.ppro.decode[2] = NULL;
12453 for (i = 0; i < 3; ++i)
12454 if (ix86_sched_data.ppro.decode[i] == NULL)
12456 ix86_sched_data.ppro.decode[i] = insn;
12464 ix86_dump_ppro_packet (dump);
12465 ix86_sched_data.ppro.decode[0] = NULL;
12466 ix86_sched_data.ppro.decode[1] = NULL;
12467 ix86_sched_data.ppro.decode[2] = NULL;
12471 return --ix86_sched_data.ppro.issued_this_cycle;
12476 ia32_use_dfa_pipeline_interface ()
12478 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12483 /* How many alternative schedules to try. This should be as wide as the
12484 scheduling freedom in the DFA, but no wider. Making this value too
12485 large results extra work for the scheduler. */
12488 ia32_multipass_dfa_lookahead ()
12490 if (ix86_tune == PROCESSOR_PENTIUM)
12497 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12498 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12502 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12504 rtx dstref, srcref, dstreg, srcreg;
12508 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12510 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12514 /* Subroutine of above to actually do the updating by recursively walking
12518 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12520 rtx dstref, srcref, dstreg, srcreg;
12522 enum rtx_code code = GET_CODE (x);
12523 const char *format_ptr = GET_RTX_FORMAT (code);
12526 if (code == MEM && XEXP (x, 0) == dstreg)
12527 MEM_COPY_ATTRIBUTES (x, dstref);
12528 else if (code == MEM && XEXP (x, 0) == srcreg)
12529 MEM_COPY_ATTRIBUTES (x, srcref);
12531 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12533 if (*format_ptr == 'e')
12534 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12536 else if (*format_ptr == 'E')
12537 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12538 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12543 /* Compute the alignment given to a constant that is being placed in memory.
12544 EXP is the constant and ALIGN is the alignment that the object would
12546 The value of this function is used instead of that alignment to align
12550 ix86_constant_alignment (exp, align)
12554 if (TREE_CODE (exp) == REAL_CST)
12556 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12558 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12561 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12568 /* Compute the alignment for a static variable.
12569 TYPE is the data type, and ALIGN is the alignment that
12570 the object would ordinarily have. The value of this function is used
12571 instead of that alignment to align the object. */
12574 ix86_data_alignment (type, align)
12578 if (AGGREGATE_TYPE_P (type)
12579 && TYPE_SIZE (type)
12580 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12581 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12582 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12585 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12586 to 16byte boundary. */
12589 if (AGGREGATE_TYPE_P (type)
12590 && TYPE_SIZE (type)
12591 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12592 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12593 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12597 if (TREE_CODE (type) == ARRAY_TYPE)
12599 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12601 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12604 else if (TREE_CODE (type) == COMPLEX_TYPE)
12607 if (TYPE_MODE (type) == DCmode && align < 64)
12609 if (TYPE_MODE (type) == XCmode && align < 128)
12612 else if ((TREE_CODE (type) == RECORD_TYPE
12613 || TREE_CODE (type) == UNION_TYPE
12614 || TREE_CODE (type) == QUAL_UNION_TYPE)
12615 && TYPE_FIELDS (type))
12617 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12619 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12622 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12623 || TREE_CODE (type) == INTEGER_TYPE)
12625 if (TYPE_MODE (type) == DFmode && align < 64)
12627 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12634 /* Compute the alignment for a local variable.
12635 TYPE is the data type, and ALIGN is the alignment that
12636 the object would ordinarily have. The value of this macro is used
12637 instead of that alignment to align the object. */
12640 ix86_local_alignment (type, align)
12644 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12645 to 16byte boundary. */
12648 if (AGGREGATE_TYPE_P (type)
12649 && TYPE_SIZE (type)
12650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12651 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12652 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12655 if (TREE_CODE (type) == ARRAY_TYPE)
12657 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12659 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12662 else if (TREE_CODE (type) == COMPLEX_TYPE)
12664 if (TYPE_MODE (type) == DCmode && align < 64)
12666 if (TYPE_MODE (type) == XCmode && align < 128)
12669 else if ((TREE_CODE (type) == RECORD_TYPE
12670 || TREE_CODE (type) == UNION_TYPE
12671 || TREE_CODE (type) == QUAL_UNION_TYPE)
12672 && TYPE_FIELDS (type))
12674 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12676 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12679 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12680 || TREE_CODE (type) == INTEGER_TYPE)
12683 if (TYPE_MODE (type) == DFmode && align < 64)
12685 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12691 /* Emit RTL insns to initialize the variable parts of a trampoline.
12692 FNADDR is an RTX for the address of the function's pure code.
12693 CXT is an RTX for the static chain value for the function. */
12695 x86_initialize_trampoline (tramp, fnaddr, cxt)
12696 rtx tramp, fnaddr, cxt;
12700 /* Compute offset from the end of the jmp to the target function. */
12701 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12702 plus_constant (tramp, 10),
12703 NULL_RTX, 1, OPTAB_DIRECT);
12704 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12705 gen_int_mode (0xb9, QImode));
12706 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12707 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12708 gen_int_mode (0xe9, QImode));
12709 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12714 /* Try to load address using shorter movl instead of movabs.
12715 We may want to support movq for kernel mode, but kernel does not use
12716 trampolines at the moment. */
12717 if (x86_64_zero_extended_value (fnaddr))
12719 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12720 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12721 gen_int_mode (0xbb41, HImode));
12722 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12723 gen_lowpart (SImode, fnaddr));
12728 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12729 gen_int_mode (0xbb49, HImode));
12730 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12734 /* Load static chain using movabs to r10. */
12735 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12736 gen_int_mode (0xba49, HImode));
12737 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12740 /* Jump to the r11 */
12741 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12742 gen_int_mode (0xff49, HImode));
12743 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12744 gen_int_mode (0xe3, QImode));
12746 if (offset > TRAMPOLINE_SIZE)
12750 #ifdef TRANSFER_FROM_TRAMPOLINE
12751 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12752 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12756 #define def_builtin(MASK, NAME, TYPE, CODE) \
12758 if ((MASK) & target_flags \
12759 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12760 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12761 NULL, NULL_TREE); \
12764 struct builtin_description
12766 const unsigned int mask;
12767 const enum insn_code icode;
12768 const char *const name;
12769 const enum ix86_builtins code;
12770 const enum rtx_code comparison;
12771 const unsigned int flag;
12774 /* Used for builtins that are enabled both by -msse and -msse2. */
12775 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12776 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12777 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12779 static const struct builtin_description bdesc_comi[] =
12781 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12782 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12783 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12784 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12785 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12786 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12787 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12788 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12789 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12790 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12791 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12792 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12793 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12794 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12795 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12796 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12807 static const struct builtin_description bdesc_2arg[] =
12810 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12811 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12812 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12813 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12814 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12815 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12816 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12817 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12819 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12820 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12821 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12822 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12823 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12824 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12825 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12826 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12827 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12828 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12829 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12830 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12831 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12832 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12833 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12834 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12835 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12836 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12837 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12838 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12840 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12841 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12842 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12843 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12845 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12846 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12847 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12848 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12850 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12851 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12852 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12853 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12854 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12857 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12858 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12859 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12860 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12861 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12862 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12863 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12864 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12866 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12867 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12869 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12870 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12871 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12872 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12873 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12876 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12877 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12879 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12880 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12881 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12884 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12885 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12887 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12888 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12889 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12890 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12891 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12894 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12895 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12896 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12897 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12900 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12901 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12902 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12903 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12911 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12912 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12913 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12915 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12917 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12918 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12919 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12920 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12923 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12925 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12926 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12927 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12932 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12934 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12935 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12948 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12949 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12950 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12951 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12952 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12953 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12954 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12955 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12956 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12957 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12958 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12959 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12960 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12961 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12962 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12963 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12968 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12992 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12993 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12994 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12995 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12996 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13064 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
13069 static const struct builtin_description bdesc_1arg[] =
13071 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13072 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13074 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13075 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13076 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13078 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13079 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13080 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13081 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13082 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13083 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13092 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13093 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13105 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13106 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13116 ix86_init_builtins ()
13119 ix86_init_mmx_sse_builtins ();
13122 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13123 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13126 ix86_init_mmx_sse_builtins ()
13128 const struct builtin_description * d;
13131 tree pchar_type_node = build_pointer_type (char_type_node);
13132 tree pcchar_type_node = build_pointer_type (
13133 build_type_variant (char_type_node, 1, 0));
13134 tree pfloat_type_node = build_pointer_type (float_type_node);
13135 tree pcfloat_type_node = build_pointer_type (
13136 build_type_variant (float_type_node, 1, 0));
13137 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13138 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13139 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13142 tree int_ftype_v4sf_v4sf
13143 = build_function_type_list (integer_type_node,
13144 V4SF_type_node, V4SF_type_node, NULL_TREE);
13145 tree v4si_ftype_v4sf_v4sf
13146 = build_function_type_list (V4SI_type_node,
13147 V4SF_type_node, V4SF_type_node, NULL_TREE);
13148 /* MMX/SSE/integer conversions. */
13149 tree int_ftype_v4sf
13150 = build_function_type_list (integer_type_node,
13151 V4SF_type_node, NULL_TREE);
13152 tree int64_ftype_v4sf
13153 = build_function_type_list (long_long_integer_type_node,
13154 V4SF_type_node, NULL_TREE);
13155 tree int_ftype_v8qi
13156 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13157 tree v4sf_ftype_v4sf_int
13158 = build_function_type_list (V4SF_type_node,
13159 V4SF_type_node, integer_type_node, NULL_TREE);
13160 tree v4sf_ftype_v4sf_int64
13161 = build_function_type_list (V4SF_type_node,
13162 V4SF_type_node, long_long_integer_type_node,
13164 tree v4sf_ftype_v4sf_v2si
13165 = build_function_type_list (V4SF_type_node,
13166 V4SF_type_node, V2SI_type_node, NULL_TREE);
13167 tree int_ftype_v4hi_int
13168 = build_function_type_list (integer_type_node,
13169 V4HI_type_node, integer_type_node, NULL_TREE);
13170 tree v4hi_ftype_v4hi_int_int
13171 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13172 integer_type_node, integer_type_node,
13174 /* Miscellaneous. */
13175 tree v8qi_ftype_v4hi_v4hi
13176 = build_function_type_list (V8QI_type_node,
13177 V4HI_type_node, V4HI_type_node, NULL_TREE);
13178 tree v4hi_ftype_v2si_v2si
13179 = build_function_type_list (V4HI_type_node,
13180 V2SI_type_node, V2SI_type_node, NULL_TREE);
13181 tree v4sf_ftype_v4sf_v4sf_int
13182 = build_function_type_list (V4SF_type_node,
13183 V4SF_type_node, V4SF_type_node,
13184 integer_type_node, NULL_TREE);
13185 tree v2si_ftype_v4hi_v4hi
13186 = build_function_type_list (V2SI_type_node,
13187 V4HI_type_node, V4HI_type_node, NULL_TREE);
13188 tree v4hi_ftype_v4hi_int
13189 = build_function_type_list (V4HI_type_node,
13190 V4HI_type_node, integer_type_node, NULL_TREE);
13191 tree v4hi_ftype_v4hi_di
13192 = build_function_type_list (V4HI_type_node,
13193 V4HI_type_node, long_long_unsigned_type_node,
13195 tree v2si_ftype_v2si_di
13196 = build_function_type_list (V2SI_type_node,
13197 V2SI_type_node, long_long_unsigned_type_node,
13199 tree void_ftype_void
13200 = build_function_type (void_type_node, void_list_node);
13201 tree void_ftype_unsigned
13202 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13203 tree unsigned_ftype_void
13204 = build_function_type (unsigned_type_node, void_list_node);
13206 = build_function_type (long_long_unsigned_type_node, void_list_node);
13207 tree v4sf_ftype_void
13208 = build_function_type (V4SF_type_node, void_list_node);
13209 tree v2si_ftype_v4sf
13210 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13211 /* Loads/stores. */
13212 tree void_ftype_v8qi_v8qi_pchar
13213 = build_function_type_list (void_type_node,
13214 V8QI_type_node, V8QI_type_node,
13215 pchar_type_node, NULL_TREE);
13216 tree v4sf_ftype_pcfloat
13217 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13218 /* @@@ the type is bogus */
13219 tree v4sf_ftype_v4sf_pv2si
13220 = build_function_type_list (V4SF_type_node,
13221 V4SF_type_node, pv2si_type_node, NULL_TREE);
13222 tree void_ftype_pv2si_v4sf
13223 = build_function_type_list (void_type_node,
13224 pv2si_type_node, V4SF_type_node, NULL_TREE);
13225 tree void_ftype_pfloat_v4sf
13226 = build_function_type_list (void_type_node,
13227 pfloat_type_node, V4SF_type_node, NULL_TREE);
13228 tree void_ftype_pdi_di
13229 = build_function_type_list (void_type_node,
13230 pdi_type_node, long_long_unsigned_type_node,
13232 tree void_ftype_pv2di_v2di
13233 = build_function_type_list (void_type_node,
13234 pv2di_type_node, V2DI_type_node, NULL_TREE);
13235 /* Normal vector unops. */
13236 tree v4sf_ftype_v4sf
13237 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13239 /* Normal vector binops. */
13240 tree v4sf_ftype_v4sf_v4sf
13241 = build_function_type_list (V4SF_type_node,
13242 V4SF_type_node, V4SF_type_node, NULL_TREE);
13243 tree v8qi_ftype_v8qi_v8qi
13244 = build_function_type_list (V8QI_type_node,
13245 V8QI_type_node, V8QI_type_node, NULL_TREE);
13246 tree v4hi_ftype_v4hi_v4hi
13247 = build_function_type_list (V4HI_type_node,
13248 V4HI_type_node, V4HI_type_node, NULL_TREE);
13249 tree v2si_ftype_v2si_v2si
13250 = build_function_type_list (V2SI_type_node,
13251 V2SI_type_node, V2SI_type_node, NULL_TREE);
13252 tree di_ftype_di_di
13253 = build_function_type_list (long_long_unsigned_type_node,
13254 long_long_unsigned_type_node,
13255 long_long_unsigned_type_node, NULL_TREE);
13257 tree v2si_ftype_v2sf
13258 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13259 tree v2sf_ftype_v2si
13260 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13261 tree v2si_ftype_v2si
13262 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13263 tree v2sf_ftype_v2sf
13264 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13265 tree v2sf_ftype_v2sf_v2sf
13266 = build_function_type_list (V2SF_type_node,
13267 V2SF_type_node, V2SF_type_node, NULL_TREE);
13268 tree v2si_ftype_v2sf_v2sf
13269 = build_function_type_list (V2SI_type_node,
13270 V2SF_type_node, V2SF_type_node, NULL_TREE);
13271 tree pint_type_node = build_pointer_type (integer_type_node);
13272 tree pcint_type_node = build_pointer_type (
13273 build_type_variant (integer_type_node, 1, 0));
13274 tree pdouble_type_node = build_pointer_type (double_type_node);
13275 tree pcdouble_type_node = build_pointer_type (
13276 build_type_variant (double_type_node, 1, 0));
13277 tree int_ftype_v2df_v2df
13278 = build_function_type_list (integer_type_node,
13279 V2DF_type_node, V2DF_type_node, NULL_TREE);
13282 = build_function_type (intTI_type_node, void_list_node);
13283 tree v2di_ftype_void
13284 = build_function_type (V2DI_type_node, void_list_node);
13285 tree ti_ftype_ti_ti
13286 = build_function_type_list (intTI_type_node,
13287 intTI_type_node, intTI_type_node, NULL_TREE);
13288 tree void_ftype_pcvoid
13289 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13291 = build_function_type_list (V2DI_type_node,
13292 long_long_unsigned_type_node, NULL_TREE);
13294 = build_function_type_list (long_long_unsigned_type_node,
13295 V2DI_type_node, NULL_TREE);
13296 tree v4sf_ftype_v4si
13297 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13298 tree v4si_ftype_v4sf
13299 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13300 tree v2df_ftype_v4si
13301 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13302 tree v4si_ftype_v2df
13303 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13304 tree v2si_ftype_v2df
13305 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13306 tree v4sf_ftype_v2df
13307 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13308 tree v2df_ftype_v2si
13309 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13310 tree v2df_ftype_v4sf
13311 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13312 tree int_ftype_v2df
13313 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13314 tree int64_ftype_v2df
13315 = build_function_type_list (long_long_integer_type_node,
13316 V2DF_type_node, NULL_TREE);
13317 tree v2df_ftype_v2df_int
13318 = build_function_type_list (V2DF_type_node,
13319 V2DF_type_node, integer_type_node, NULL_TREE);
13320 tree v2df_ftype_v2df_int64
13321 = build_function_type_list (V2DF_type_node,
13322 V2DF_type_node, long_long_integer_type_node,
13324 tree v4sf_ftype_v4sf_v2df
13325 = build_function_type_list (V4SF_type_node,
13326 V4SF_type_node, V2DF_type_node, NULL_TREE);
13327 tree v2df_ftype_v2df_v4sf
13328 = build_function_type_list (V2DF_type_node,
13329 V2DF_type_node, V4SF_type_node, NULL_TREE);
13330 tree v2df_ftype_v2df_v2df_int
13331 = build_function_type_list (V2DF_type_node,
13332 V2DF_type_node, V2DF_type_node,
13335 tree v2df_ftype_v2df_pv2si
13336 = build_function_type_list (V2DF_type_node,
13337 V2DF_type_node, pv2si_type_node, NULL_TREE);
13338 tree void_ftype_pv2si_v2df
13339 = build_function_type_list (void_type_node,
13340 pv2si_type_node, V2DF_type_node, NULL_TREE);
13341 tree void_ftype_pdouble_v2df
13342 = build_function_type_list (void_type_node,
13343 pdouble_type_node, V2DF_type_node, NULL_TREE);
13344 tree void_ftype_pint_int
13345 = build_function_type_list (void_type_node,
13346 pint_type_node, integer_type_node, NULL_TREE);
13347 tree void_ftype_v16qi_v16qi_pchar
13348 = build_function_type_list (void_type_node,
13349 V16QI_type_node, V16QI_type_node,
13350 pchar_type_node, NULL_TREE);
13351 tree v2df_ftype_pcdouble
13352 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_v2df
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, V2DF_type_node, NULL_TREE);
13356 tree v16qi_ftype_v16qi_v16qi
13357 = build_function_type_list (V16QI_type_node,
13358 V16QI_type_node, V16QI_type_node, NULL_TREE);
13359 tree v8hi_ftype_v8hi_v8hi
13360 = build_function_type_list (V8HI_type_node,
13361 V8HI_type_node, V8HI_type_node, NULL_TREE);
13362 tree v4si_ftype_v4si_v4si
13363 = build_function_type_list (V4SI_type_node,
13364 V4SI_type_node, V4SI_type_node, NULL_TREE);
13365 tree v2di_ftype_v2di_v2di
13366 = build_function_type_list (V2DI_type_node,
13367 V2DI_type_node, V2DI_type_node, NULL_TREE);
13368 tree v2di_ftype_v2df_v2df
13369 = build_function_type_list (V2DI_type_node,
13370 V2DF_type_node, V2DF_type_node, NULL_TREE);
13371 tree v2df_ftype_v2df
13372 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13373 tree v2df_ftype_double
13374 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13375 tree v2df_ftype_double_double
13376 = build_function_type_list (V2DF_type_node,
13377 double_type_node, double_type_node, NULL_TREE);
13378 tree int_ftype_v8hi_int
13379 = build_function_type_list (integer_type_node,
13380 V8HI_type_node, integer_type_node, NULL_TREE);
13381 tree v8hi_ftype_v8hi_int_int
13382 = build_function_type_list (V8HI_type_node,
13383 V8HI_type_node, integer_type_node,
13384 integer_type_node, NULL_TREE);
13385 tree v2di_ftype_v2di_int
13386 = build_function_type_list (V2DI_type_node,
13387 V2DI_type_node, integer_type_node, NULL_TREE);
13388 tree v4si_ftype_v4si_int
13389 = build_function_type_list (V4SI_type_node,
13390 V4SI_type_node, integer_type_node, NULL_TREE);
13391 tree v8hi_ftype_v8hi_int
13392 = build_function_type_list (V8HI_type_node,
13393 V8HI_type_node, integer_type_node, NULL_TREE);
13394 tree v8hi_ftype_v8hi_v2di
13395 = build_function_type_list (V8HI_type_node,
13396 V8HI_type_node, V2DI_type_node, NULL_TREE);
13397 tree v4si_ftype_v4si_v2di
13398 = build_function_type_list (V4SI_type_node,
13399 V4SI_type_node, V2DI_type_node, NULL_TREE);
13400 tree v4si_ftype_v8hi_v8hi
13401 = build_function_type_list (V4SI_type_node,
13402 V8HI_type_node, V8HI_type_node, NULL_TREE);
13403 tree di_ftype_v8qi_v8qi
13404 = build_function_type_list (long_long_unsigned_type_node,
13405 V8QI_type_node, V8QI_type_node, NULL_TREE);
13406 tree v2di_ftype_v16qi_v16qi
13407 = build_function_type_list (V2DI_type_node,
13408 V16QI_type_node, V16QI_type_node, NULL_TREE);
13409 tree int_ftype_v16qi
13410 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13411 tree v16qi_ftype_pcchar
13412 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13413 tree void_ftype_pchar_v16qi
13414 = build_function_type_list (void_type_node,
13415 pchar_type_node, V16QI_type_node, NULL_TREE);
13416 tree v4si_ftype_pcint
13417 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13418 tree void_ftype_pcint_v4si
13419 = build_function_type_list (void_type_node,
13420 pcint_type_node, V4SI_type_node, NULL_TREE);
13421 tree v2di_ftype_v2di
13422 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13424 /* Add all builtins that are more or less simple operations on two
13426 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13428 /* Use one of the operands; the target can have a different mode for
13429 mask-generating compares. */
13430 enum machine_mode mode;
13435 mode = insn_data[d->icode].operand[1].mode;
13440 type = v16qi_ftype_v16qi_v16qi;
13443 type = v8hi_ftype_v8hi_v8hi;
13446 type = v4si_ftype_v4si_v4si;
13449 type = v2di_ftype_v2di_v2di;
13452 type = v2df_ftype_v2df_v2df;
13455 type = ti_ftype_ti_ti;
13458 type = v4sf_ftype_v4sf_v4sf;
13461 type = v8qi_ftype_v8qi_v8qi;
13464 type = v4hi_ftype_v4hi_v4hi;
13467 type = v2si_ftype_v2si_v2si;
13470 type = di_ftype_di_di;
13477 /* Override for comparisons. */
13478 if (d->icode == CODE_FOR_maskcmpv4sf3
13479 || d->icode == CODE_FOR_maskncmpv4sf3
13480 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13481 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13482 type = v4si_ftype_v4sf_v4sf;
13484 if (d->icode == CODE_FOR_maskcmpv2df3
13485 || d->icode == CODE_FOR_maskncmpv2df3
13486 || d->icode == CODE_FOR_vmmaskcmpv2df3
13487 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13488 type = v2di_ftype_v2df_v2df;
13490 def_builtin (d->mask, d->name, type, d->code);
13493 /* Add the remaining MMX insns with somewhat more complicated types. */
13494 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13495 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13496 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13497 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13498 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13500 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13501 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13502 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13504 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13505 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13507 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13508 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13510 /* comi/ucomi insns. */
13511 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13512 if (d->mask == MASK_SSE2)
13513 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13515 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13517 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13518 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13519 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13521 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13522 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13523 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13524 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13525 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13526 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13527 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13528 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13529 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13530 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13531 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13533 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13534 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13536 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13538 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13539 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13540 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13541 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13542 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13543 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13545 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13546 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13547 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13548 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13550 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13551 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13552 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13553 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13555 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13557 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13559 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13560 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13561 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13562 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13563 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13564 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13566 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13568 /* Original 3DNow! */
13569 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13570 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13571 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13572 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13573 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13574 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13575 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13577 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13578 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13579 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13584 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13585 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13586 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13587 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13588 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13590 /* 3DNow! extension as used in the Athlon CPU. */
13591 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13592 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13593 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13594 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13595 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13596 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13598 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13649 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13650 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13657 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13681 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13710 /* Errors in the source file can cause expand_expr to return const0_rtx
13711 where we expect a vector. To avoid crashing, use one of the vector
13712 clear instructions. */
13714 safe_vector_operand (x, mode)
13716 enum machine_mode mode;
13718 if (x != const0_rtx)
13720 x = gen_reg_rtx (mode);
13722 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13723 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13724 : gen_rtx_SUBREG (DImode, x, 0)));
13726 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13727 : gen_rtx_SUBREG (V4SFmode, x, 0),
13728 CONST0_RTX (V4SFmode)));
13732 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13735 ix86_expand_binop_builtin (icode, arglist, target)
13736 enum insn_code icode;
13741 tree arg0 = TREE_VALUE (arglist);
13742 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13743 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13744 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13745 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13746 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13747 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13749 if (VECTOR_MODE_P (mode0))
13750 op0 = safe_vector_operand (op0, mode0);
13751 if (VECTOR_MODE_P (mode1))
13752 op1 = safe_vector_operand (op1, mode1);
13755 || GET_MODE (target) != tmode
13756 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13757 target = gen_reg_rtx (tmode);
13759 if (GET_MODE (op1) == SImode && mode1 == TImode)
13761 rtx x = gen_reg_rtx (V4SImode);
13762 emit_insn (gen_sse2_loadd (x, op1));
13763 op1 = gen_lowpart (TImode, x);
13766 /* In case the insn wants input operands in modes different from
13767 the result, abort. */
13768 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13771 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13772 op0 = copy_to_mode_reg (mode0, op0);
13773 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13774 op1 = copy_to_mode_reg (mode1, op1);
13776 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13777 yet one of the two must not be a memory. This is normally enforced
13778 by expanders, but we didn't bother to create one here. */
13779 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13780 op0 = copy_to_mode_reg (mode0, op0);
13782 pat = GEN_FCN (icode) (target, op0, op1);
13789 /* Subroutine of ix86_expand_builtin to take care of stores. */
13792 ix86_expand_store_builtin (icode, arglist)
13793 enum insn_code icode;
13797 tree arg0 = TREE_VALUE (arglist);
13798 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13799 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13802 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13804 if (VECTOR_MODE_P (mode1))
13805 op1 = safe_vector_operand (op1, mode1);
13807 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13808 op1 = copy_to_mode_reg (mode1, op1);
13810 pat = GEN_FCN (icode) (op0, op1);
13816 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13819 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13820 enum insn_code icode;
13826 tree arg0 = TREE_VALUE (arglist);
13827 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13828 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13829 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13832 || GET_MODE (target) != tmode
13833 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13834 target = gen_reg_rtx (tmode);
13836 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13839 if (VECTOR_MODE_P (mode0))
13840 op0 = safe_vector_operand (op0, mode0);
13842 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13843 op0 = copy_to_mode_reg (mode0, op0);
13846 pat = GEN_FCN (icode) (target, op0);
13853 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13854 sqrtss, rsqrtss, rcpss. */
13857 ix86_expand_unop1_builtin (icode, arglist, target)
13858 enum insn_code icode;
13863 tree arg0 = TREE_VALUE (arglist);
13864 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13865 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13866 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13869 || GET_MODE (target) != tmode
13870 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13871 target = gen_reg_rtx (tmode);
13873 if (VECTOR_MODE_P (mode0))
13874 op0 = safe_vector_operand (op0, mode0);
13876 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13877 op0 = copy_to_mode_reg (mode0, op0);
13880 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13881 op1 = copy_to_mode_reg (mode0, op1);
13883 pat = GEN_FCN (icode) (target, op0, op1);
13890 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13893 ix86_expand_sse_compare (d, arglist, target)
13894 const struct builtin_description *d;
13899 tree arg0 = TREE_VALUE (arglist);
13900 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13901 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13902 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13904 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13905 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13906 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13907 enum rtx_code comparison = d->comparison;
13909 if (VECTOR_MODE_P (mode0))
13910 op0 = safe_vector_operand (op0, mode0);
13911 if (VECTOR_MODE_P (mode1))
13912 op1 = safe_vector_operand (op1, mode1);
13914 /* Swap operands if we have a comparison that isn't available in
13918 rtx tmp = gen_reg_rtx (mode1);
13919 emit_move_insn (tmp, op1);
13925 || GET_MODE (target) != tmode
13926 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13927 target = gen_reg_rtx (tmode);
13929 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13930 op0 = copy_to_mode_reg (mode0, op0);
13931 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13932 op1 = copy_to_mode_reg (mode1, op1);
13934 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13935 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13942 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13945 ix86_expand_sse_comi (d, arglist, target)
13946 const struct builtin_description *d;
13951 tree arg0 = TREE_VALUE (arglist);
13952 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13953 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13954 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13956 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13957 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13958 enum rtx_code comparison = d->comparison;
13960 if (VECTOR_MODE_P (mode0))
13961 op0 = safe_vector_operand (op0, mode0);
13962 if (VECTOR_MODE_P (mode1))
13963 op1 = safe_vector_operand (op1, mode1);
13965 /* Swap operands if we have a comparison that isn't available in
13974 target = gen_reg_rtx (SImode);
13975 emit_move_insn (target, const0_rtx);
13976 target = gen_rtx_SUBREG (QImode, target, 0);
13978 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13979 op0 = copy_to_mode_reg (mode0, op0);
13980 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13981 op1 = copy_to_mode_reg (mode1, op1);
13983 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13984 pat = GEN_FCN (d->icode) (op0, op1);
13988 emit_insn (gen_rtx_SET (VOIDmode,
13989 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13990 gen_rtx_fmt_ee (comparison, QImode,
13994 return SUBREG_REG (target);
13997 /* Expand an expression EXP that calls a built-in function,
13998 with result going to TARGET if that's convenient
13999 (and in mode MODE if that's convenient).
14000 SUBTARGET may be used as the target for computing one of EXP's operands.
14001 IGNORE is nonzero if the value is to be ignored. */
14004 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
14007 rtx subtarget ATTRIBUTE_UNUSED;
14008 enum machine_mode mode ATTRIBUTE_UNUSED;
14009 int ignore ATTRIBUTE_UNUSED;
14011 const struct builtin_description *d;
14013 enum insn_code icode;
14014 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14015 tree arglist = TREE_OPERAND (exp, 1);
14016 tree arg0, arg1, arg2;
14017 rtx op0, op1, op2, pat;
14018 enum machine_mode tmode, mode0, mode1, mode2;
14019 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14023 case IX86_BUILTIN_EMMS:
14024 emit_insn (gen_emms ());
14027 case IX86_BUILTIN_SFENCE:
14028 emit_insn (gen_sfence ());
14031 case IX86_BUILTIN_PEXTRW:
14032 case IX86_BUILTIN_PEXTRW128:
14033 icode = (fcode == IX86_BUILTIN_PEXTRW
14034 ? CODE_FOR_mmx_pextrw
14035 : CODE_FOR_sse2_pextrw);
14036 arg0 = TREE_VALUE (arglist);
14037 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14038 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14039 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14040 tmode = insn_data[icode].operand[0].mode;
14041 mode0 = insn_data[icode].operand[1].mode;
14042 mode1 = insn_data[icode].operand[2].mode;
14044 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14045 op0 = copy_to_mode_reg (mode0, op0);
14046 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14048 /* @@@ better error message */
14049 error ("selector must be an immediate");
14050 return gen_reg_rtx (tmode);
14053 || GET_MODE (target) != tmode
14054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14055 target = gen_reg_rtx (tmode);
14056 pat = GEN_FCN (icode) (target, op0, op1);
14062 case IX86_BUILTIN_PINSRW:
14063 case IX86_BUILTIN_PINSRW128:
14064 icode = (fcode == IX86_BUILTIN_PINSRW
14065 ? CODE_FOR_mmx_pinsrw
14066 : CODE_FOR_sse2_pinsrw);
14067 arg0 = TREE_VALUE (arglist);
14068 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14069 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14070 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14071 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14072 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14073 tmode = insn_data[icode].operand[0].mode;
14074 mode0 = insn_data[icode].operand[1].mode;
14075 mode1 = insn_data[icode].operand[2].mode;
14076 mode2 = insn_data[icode].operand[3].mode;
14078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14079 op0 = copy_to_mode_reg (mode0, op0);
14080 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14081 op1 = copy_to_mode_reg (mode1, op1);
14082 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14084 /* @@@ better error message */
14085 error ("selector must be an immediate");
14089 || GET_MODE (target) != tmode
14090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14091 target = gen_reg_rtx (tmode);
14092 pat = GEN_FCN (icode) (target, op0, op1, op2);
14098 case IX86_BUILTIN_MASKMOVQ:
14099 case IX86_BUILTIN_MASKMOVDQU:
14100 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14101 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14102 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14103 : CODE_FOR_sse2_maskmovdqu));
14104 /* Note the arg order is different from the operand order. */
14105 arg1 = TREE_VALUE (arglist);
14106 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14107 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14108 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14109 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14110 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14111 mode0 = insn_data[icode].operand[0].mode;
14112 mode1 = insn_data[icode].operand[1].mode;
14113 mode2 = insn_data[icode].operand[2].mode;
14115 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14116 op0 = copy_to_mode_reg (mode0, op0);
14117 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14118 op1 = copy_to_mode_reg (mode1, op1);
14119 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14120 op2 = copy_to_mode_reg (mode2, op2);
14121 pat = GEN_FCN (icode) (op0, op1, op2);
14127 case IX86_BUILTIN_SQRTSS:
14128 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14129 case IX86_BUILTIN_RSQRTSS:
14130 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14131 case IX86_BUILTIN_RCPSS:
14132 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14134 case IX86_BUILTIN_LOADAPS:
14135 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14137 case IX86_BUILTIN_LOADUPS:
14138 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14140 case IX86_BUILTIN_STOREAPS:
14141 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14143 case IX86_BUILTIN_STOREUPS:
14144 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14146 case IX86_BUILTIN_LOADSS:
14147 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14149 case IX86_BUILTIN_STORESS:
14150 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14152 case IX86_BUILTIN_LOADHPS:
14153 case IX86_BUILTIN_LOADLPS:
14154 case IX86_BUILTIN_LOADHPD:
14155 case IX86_BUILTIN_LOADLPD:
14156 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14157 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14158 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14159 : CODE_FOR_sse2_movlpd);
14160 arg0 = TREE_VALUE (arglist);
14161 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14162 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14163 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14164 tmode = insn_data[icode].operand[0].mode;
14165 mode0 = insn_data[icode].operand[1].mode;
14166 mode1 = insn_data[icode].operand[2].mode;
14168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14169 op0 = copy_to_mode_reg (mode0, op0);
14170 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14172 || GET_MODE (target) != tmode
14173 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14174 target = gen_reg_rtx (tmode);
14175 pat = GEN_FCN (icode) (target, op0, op1);
14181 case IX86_BUILTIN_STOREHPS:
14182 case IX86_BUILTIN_STORELPS:
14183 case IX86_BUILTIN_STOREHPD:
14184 case IX86_BUILTIN_STORELPD:
14185 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14186 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14187 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14188 : CODE_FOR_sse2_movlpd);
14189 arg0 = TREE_VALUE (arglist);
14190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 mode0 = insn_data[icode].operand[1].mode;
14194 mode1 = insn_data[icode].operand[2].mode;
14196 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14197 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14198 op1 = copy_to_mode_reg (mode1, op1);
14200 pat = GEN_FCN (icode) (op0, op0, op1);
14206 case IX86_BUILTIN_MOVNTPS:
14207 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14208 case IX86_BUILTIN_MOVNTQ:
14209 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14211 case IX86_BUILTIN_LDMXCSR:
14212 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14213 target = assign_386_stack_local (SImode, 0);
14214 emit_move_insn (target, op0);
14215 emit_insn (gen_ldmxcsr (target));
14218 case IX86_BUILTIN_STMXCSR:
14219 target = assign_386_stack_local (SImode, 0);
14220 emit_insn (gen_stmxcsr (target));
14221 return copy_to_mode_reg (SImode, target);
14223 case IX86_BUILTIN_SHUFPS:
14224 case IX86_BUILTIN_SHUFPD:
14225 icode = (fcode == IX86_BUILTIN_SHUFPS
14226 ? CODE_FOR_sse_shufps
14227 : CODE_FOR_sse2_shufpd);
14228 arg0 = TREE_VALUE (arglist);
14229 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14230 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14231 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14232 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14233 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14234 tmode = insn_data[icode].operand[0].mode;
14235 mode0 = insn_data[icode].operand[1].mode;
14236 mode1 = insn_data[icode].operand[2].mode;
14237 mode2 = insn_data[icode].operand[3].mode;
14239 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14240 op0 = copy_to_mode_reg (mode0, op0);
14241 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14242 op1 = copy_to_mode_reg (mode1, op1);
14243 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14245 /* @@@ better error message */
14246 error ("mask must be an immediate");
14247 return gen_reg_rtx (tmode);
14250 || GET_MODE (target) != tmode
14251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14252 target = gen_reg_rtx (tmode);
14253 pat = GEN_FCN (icode) (target, op0, op1, op2);
14259 case IX86_BUILTIN_PSHUFW:
14260 case IX86_BUILTIN_PSHUFD:
14261 case IX86_BUILTIN_PSHUFHW:
14262 case IX86_BUILTIN_PSHUFLW:
14263 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14264 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14265 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14266 : CODE_FOR_mmx_pshufw);
14267 arg0 = TREE_VALUE (arglist);
14268 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14269 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14270 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14271 tmode = insn_data[icode].operand[0].mode;
14272 mode1 = insn_data[icode].operand[1].mode;
14273 mode2 = insn_data[icode].operand[2].mode;
14275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14276 op0 = copy_to_mode_reg (mode1, op0);
14277 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14279 /* @@@ better error message */
14280 error ("mask must be an immediate");
14284 || GET_MODE (target) != tmode
14285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14286 target = gen_reg_rtx (tmode);
14287 pat = GEN_FCN (icode) (target, op0, op1);
14293 case IX86_BUILTIN_PSLLDQI128:
14294 case IX86_BUILTIN_PSRLDQI128:
14295 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14296 : CODE_FOR_sse2_lshrti3);
14297 arg0 = TREE_VALUE (arglist);
14298 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14299 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14300 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14301 tmode = insn_data[icode].operand[0].mode;
14302 mode1 = insn_data[icode].operand[1].mode;
14303 mode2 = insn_data[icode].operand[2].mode;
14305 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14307 op0 = copy_to_reg (op0);
14308 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14310 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14312 error ("shift must be an immediate");
14315 target = gen_reg_rtx (V2DImode);
14316 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14322 case IX86_BUILTIN_FEMMS:
14323 emit_insn (gen_femms ());
14326 case IX86_BUILTIN_PAVGUSB:
14327 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14329 case IX86_BUILTIN_PF2ID:
14330 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14332 case IX86_BUILTIN_PFACC:
14333 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14335 case IX86_BUILTIN_PFADD:
14336 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14338 case IX86_BUILTIN_PFCMPEQ:
14339 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14341 case IX86_BUILTIN_PFCMPGE:
14342 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14344 case IX86_BUILTIN_PFCMPGT:
14345 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14347 case IX86_BUILTIN_PFMAX:
14348 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14350 case IX86_BUILTIN_PFMIN:
14351 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14353 case IX86_BUILTIN_PFMUL:
14354 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14356 case IX86_BUILTIN_PFRCP:
14357 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14359 case IX86_BUILTIN_PFRCPIT1:
14360 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14362 case IX86_BUILTIN_PFRCPIT2:
14363 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14365 case IX86_BUILTIN_PFRSQIT1:
14366 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14368 case IX86_BUILTIN_PFRSQRT:
14369 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14371 case IX86_BUILTIN_PFSUB:
14372 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14374 case IX86_BUILTIN_PFSUBR:
14375 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14377 case IX86_BUILTIN_PI2FD:
14378 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14380 case IX86_BUILTIN_PMULHRW:
14381 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14383 case IX86_BUILTIN_PF2IW:
14384 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14386 case IX86_BUILTIN_PFNACC:
14387 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14389 case IX86_BUILTIN_PFPNACC:
14390 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14392 case IX86_BUILTIN_PI2FW:
14393 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14395 case IX86_BUILTIN_PSWAPDSI:
14396 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14398 case IX86_BUILTIN_PSWAPDSF:
14399 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14401 case IX86_BUILTIN_SSE_ZERO:
14402 target = gen_reg_rtx (V4SFmode);
14403 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14406 case IX86_BUILTIN_MMX_ZERO:
14407 target = gen_reg_rtx (DImode);
14408 emit_insn (gen_mmx_clrdi (target));
14411 case IX86_BUILTIN_CLRTI:
14412 target = gen_reg_rtx (V2DImode);
14413 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14417 case IX86_BUILTIN_SQRTSD:
14418 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14419 case IX86_BUILTIN_LOADAPD:
14420 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14421 case IX86_BUILTIN_LOADUPD:
14422 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14424 case IX86_BUILTIN_STOREAPD:
14425 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14426 case IX86_BUILTIN_STOREUPD:
14427 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14429 case IX86_BUILTIN_LOADSD:
14430 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14432 case IX86_BUILTIN_STORESD:
14433 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14435 case IX86_BUILTIN_SETPD1:
14436 target = assign_386_stack_local (DFmode, 0);
14437 arg0 = TREE_VALUE (arglist);
14438 emit_move_insn (adjust_address (target, DFmode, 0),
14439 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14440 op0 = gen_reg_rtx (V2DFmode);
14441 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14442 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14445 case IX86_BUILTIN_SETPD:
14446 target = assign_386_stack_local (V2DFmode, 0);
14447 arg0 = TREE_VALUE (arglist);
14448 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14449 emit_move_insn (adjust_address (target, DFmode, 0),
14450 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14451 emit_move_insn (adjust_address (target, DFmode, 8),
14452 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14453 op0 = gen_reg_rtx (V2DFmode);
14454 emit_insn (gen_sse2_movapd (op0, target));
14457 case IX86_BUILTIN_LOADRPD:
14458 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14459 gen_reg_rtx (V2DFmode), 1);
14460 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14463 case IX86_BUILTIN_LOADPD1:
14464 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14465 gen_reg_rtx (V2DFmode), 1);
14466 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14469 case IX86_BUILTIN_STOREPD1:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14471 case IX86_BUILTIN_STORERPD:
14472 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14474 case IX86_BUILTIN_CLRPD:
14475 target = gen_reg_rtx (V2DFmode);
14476 emit_insn (gen_sse_clrv2df (target));
14479 case IX86_BUILTIN_MFENCE:
14480 emit_insn (gen_sse2_mfence ());
14482 case IX86_BUILTIN_LFENCE:
14483 emit_insn (gen_sse2_lfence ());
14486 case IX86_BUILTIN_CLFLUSH:
14487 arg0 = TREE_VALUE (arglist);
14488 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14489 icode = CODE_FOR_sse2_clflush;
14490 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14491 op0 = copy_to_mode_reg (Pmode, op0);
14493 emit_insn (gen_sse2_clflush (op0));
14496 case IX86_BUILTIN_MOVNTPD:
14497 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14498 case IX86_BUILTIN_MOVNTDQ:
14499 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14500 case IX86_BUILTIN_MOVNTI:
14501 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14503 case IX86_BUILTIN_LOADDQA:
14504 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14505 case IX86_BUILTIN_LOADDQU:
14506 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14507 case IX86_BUILTIN_LOADD:
14508 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14510 case IX86_BUILTIN_STOREDQA:
14511 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14512 case IX86_BUILTIN_STOREDQU:
14513 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14514 case IX86_BUILTIN_STORED:
14515 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14521 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14522 if (d->code == fcode)
14524 /* Compares are treated specially. */
14525 if (d->icode == CODE_FOR_maskcmpv4sf3
14526 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14527 || d->icode == CODE_FOR_maskncmpv4sf3
14528 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14529 || d->icode == CODE_FOR_maskcmpv2df3
14530 || d->icode == CODE_FOR_vmmaskcmpv2df3
14531 || d->icode == CODE_FOR_maskncmpv2df3
14532 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14533 return ix86_expand_sse_compare (d, arglist, target);
14535 return ix86_expand_binop_builtin (d->icode, arglist, target);
14538 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14539 if (d->code == fcode)
14540 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14542 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14543 if (d->code == fcode)
14544 return ix86_expand_sse_comi (d, arglist, target);
14546 /* @@@ Should really do something sensible here. */
14550 /* Store OPERAND to the memory after reload is completed. This means
14551 that we can't easily use assign_stack_local. */
14553 ix86_force_to_memory (mode, operand)
14554 enum machine_mode mode;
14558 if (!reload_completed)
14560 if (TARGET_RED_ZONE)
14562 result = gen_rtx_MEM (mode,
14563 gen_rtx_PLUS (Pmode,
14565 GEN_INT (-RED_ZONE_SIZE)));
14566 emit_move_insn (result, operand);
14568 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14574 operand = gen_lowpart (DImode, operand);
14578 gen_rtx_SET (VOIDmode,
14579 gen_rtx_MEM (DImode,
14580 gen_rtx_PRE_DEC (DImode,
14581 stack_pointer_rtx)),
14587 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14596 split_di (&operand, 1, operands, operands + 1);
14598 gen_rtx_SET (VOIDmode,
14599 gen_rtx_MEM (SImode,
14600 gen_rtx_PRE_DEC (Pmode,
14601 stack_pointer_rtx)),
14604 gen_rtx_SET (VOIDmode,
14605 gen_rtx_MEM (SImode,
14606 gen_rtx_PRE_DEC (Pmode,
14607 stack_pointer_rtx)),
14612 /* It is better to store HImodes as SImodes. */
14613 if (!TARGET_PARTIAL_REG_STALL)
14614 operand = gen_lowpart (SImode, operand);
14618 gen_rtx_SET (VOIDmode,
14619 gen_rtx_MEM (GET_MODE (operand),
14620 gen_rtx_PRE_DEC (SImode,
14621 stack_pointer_rtx)),
14627 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14632 /* Free operand from the memory. */
14634 ix86_free_from_memory (mode)
14635 enum machine_mode mode;
14637 if (!TARGET_RED_ZONE)
14641 if (mode == DImode || TARGET_64BIT)
14643 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14647 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14648 to pop or add instruction if registers are available. */
14649 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14650 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14655 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14656 QImode must go into class Q_REGS.
14657 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14658 movdf to do mem-to-mem moves through integer regs. */
14660 ix86_preferred_reload_class (x, class)
14662 enum reg_class class;
14664 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14666 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14668 /* SSE can't load any constant directly yet. */
14669 if (SSE_CLASS_P (class))
14671 /* Floats can load 0 and 1. */
14672 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14674 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14675 if (MAYBE_SSE_CLASS_P (class))
14676 return (reg_class_subset_p (class, GENERAL_REGS)
14677 ? GENERAL_REGS : FLOAT_REGS);
14681 /* General regs can load everything. */
14682 if (reg_class_subset_p (class, GENERAL_REGS))
14683 return GENERAL_REGS;
14684 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14685 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14688 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14690 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14695 /* If we are copying between general and FP registers, we need a memory
14696 location. The same is true for SSE and MMX registers.
14698 The macro can't work reliably when one of the CLASSES is class containing
14699 registers from multiple units (SSE, MMX, integer). We avoid this by never
14700 combining those units in single alternative in the machine description.
14701 Ensure that this constraint holds to avoid unexpected surprises.
14703 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14704 enforce these sanity checks. */
14706 ix86_secondary_memory_needed (class1, class2, mode, strict)
14707 enum reg_class class1, class2;
14708 enum machine_mode mode;
14711 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14712 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14713 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14714 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14715 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14716 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14723 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14724 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14725 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14726 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14727 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14729 /* Return the cost of moving data from a register in class CLASS1 to
14730 one in class CLASS2.
14732 It is not required that the cost always equal 2 when FROM is the same as TO;
14733 on some machines it is expensive to move between registers if they are not
14734 general registers. */
14736 ix86_register_move_cost (mode, class1, class2)
14737 enum machine_mode mode;
14738 enum reg_class class1, class2;
14740 /* In case we require secondary memory, compute cost of the store followed
14741 by load. In order to avoid bad register allocation choices, we need
14742 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14744 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14748 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14749 MEMORY_MOVE_COST (mode, class1, 1));
14750 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14751 MEMORY_MOVE_COST (mode, class2, 1));
14753 /* In case of copying from general_purpose_register we may emit multiple
14754 stores followed by single load causing memory size mismatch stall.
14755 Count this as arbitrarily high cost of 20. */
14756 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14759 /* In the case of FP/MMX moves, the registers actually overlap, and we
14760 have to switch modes in order to treat them differently. */
14761 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14762 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14768 /* Moves between SSE/MMX and integer unit are expensive. */
14769 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14770 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14771 return ix86_cost->mmxsse_to_integer;
14772 if (MAYBE_FLOAT_CLASS_P (class1))
14773 return ix86_cost->fp_move;
14774 if (MAYBE_SSE_CLASS_P (class1))
14775 return ix86_cost->sse_move;
14776 if (MAYBE_MMX_CLASS_P (class1))
14777 return ix86_cost->mmx_move;
14781 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14783 ix86_hard_regno_mode_ok (regno, mode)
14785 enum machine_mode mode;
14787 /* Flags and only flags can only hold CCmode values. */
14788 if (CC_REGNO_P (regno))
14789 return GET_MODE_CLASS (mode) == MODE_CC;
14790 if (GET_MODE_CLASS (mode) == MODE_CC
14791 || GET_MODE_CLASS (mode) == MODE_RANDOM
14792 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14794 if (FP_REGNO_P (regno))
14795 return VALID_FP_MODE_P (mode);
14796 if (SSE_REGNO_P (regno))
14797 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14798 if (MMX_REGNO_P (regno))
14800 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14801 /* We handle both integer and floats in the general purpose registers.
14802 In future we should be able to handle vector modes as well. */
14803 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14805 /* Take care for QImode values - they can be in non-QI regs, but then
14806 they do cause partial register stalls. */
14807 if (regno < 4 || mode != QImode || TARGET_64BIT)
14809 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14812 /* Return the cost of moving data of mode M between a
14813 register and memory. A value of 2 is the default; this cost is
14814 relative to those in `REGISTER_MOVE_COST'.
14816 If moving between registers and memory is more expensive than
14817 between two registers, you should define this macro to express the
14820 Model also increased moving costs of QImode registers in non
14824 ix86_memory_move_cost (mode, class, in)
14825 enum machine_mode mode;
14826 enum reg_class class;
14829 if (FLOAT_CLASS_P (class))
14847 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14849 if (SSE_CLASS_P (class))
14852 switch (GET_MODE_SIZE (mode))
14866 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14868 if (MMX_CLASS_P (class))
14871 switch (GET_MODE_SIZE (mode))
14882 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14884 switch (GET_MODE_SIZE (mode))
14888 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14889 : ix86_cost->movzbl_load);
14891 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14892 : ix86_cost->int_store[0] + 4);
14895 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14897 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14898 if (mode == TFmode)
14900 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14901 * (((int) GET_MODE_SIZE (mode)
14902 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14906 /* Compute a (partial) cost for rtx X. Return true if the complete
14907 cost has been computed, and false if subexpressions should be
14908 scanned. In either case, *TOTAL contains the cost result. */
14911 ix86_rtx_costs (x, code, outer_code, total)
14913 int code, outer_code;
14916 enum machine_mode mode = GET_MODE (x);
14924 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14926 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14928 else if (flag_pic && SYMBOLIC_CONST (x)
14930 || (!GET_CODE (x) != LABEL_REF
14931 && (GET_CODE (x) != SYMBOL_REF
14932 || !SYMBOL_REF_LOCAL_P (x)))))
14939 if (mode == VOIDmode)
14942 switch (standard_80387_constant_p (x))
14947 default: /* Other constants */
14952 /* Start with (MEM (SYMBOL_REF)), since that's where
14953 it'll probably end up. Add a penalty for size. */
14954 *total = (COSTS_N_INSNS (1)
14955 + (flag_pic != 0 && !TARGET_64BIT)
14956 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14962 /* The zero extensions is often completely free on x86_64, so make
14963 it as cheap as possible. */
14964 if (TARGET_64BIT && mode == DImode
14965 && GET_MODE (XEXP (x, 0)) == SImode)
14967 else if (TARGET_ZERO_EXTEND_WITH_AND)
14968 *total = COSTS_N_INSNS (ix86_cost->add);
14970 *total = COSTS_N_INSNS (ix86_cost->movzx);
14974 *total = COSTS_N_INSNS (ix86_cost->movsx);
14978 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14979 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14981 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14984 *total = COSTS_N_INSNS (ix86_cost->add);
14987 if ((value == 2 || value == 3)
14988 && !TARGET_DECOMPOSE_LEA
14989 && ix86_cost->lea <= ix86_cost->shift_const)
14991 *total = COSTS_N_INSNS (ix86_cost->lea);
15001 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15003 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15005 if (INTVAL (XEXP (x, 1)) > 32)
15006 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15008 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15012 if (GET_CODE (XEXP (x, 1)) == AND)
15013 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15015 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15020 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15021 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15023 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15028 if (FLOAT_MODE_P (mode))
15029 *total = COSTS_N_INSNS (ix86_cost->fmul);
15030 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15032 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15035 for (nbits = 0; value != 0; value >>= 1)
15038 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15039 + nbits * ix86_cost->mult_bit);
15043 /* This is arbitrary */
15044 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15045 + 7 * ix86_cost->mult_bit);
15053 if (FLOAT_MODE_P (mode))
15054 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15056 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15060 if (FLOAT_MODE_P (mode))
15061 *total = COSTS_N_INSNS (ix86_cost->fadd);
15062 else if (!TARGET_DECOMPOSE_LEA
15063 && GET_MODE_CLASS (mode) == MODE_INT
15064 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15066 if (GET_CODE (XEXP (x, 0)) == PLUS
15067 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15068 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15069 && CONSTANT_P (XEXP (x, 1)))
15071 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15072 if (val == 2 || val == 4 || val == 8)
15074 *total = COSTS_N_INSNS (ix86_cost->lea);
15075 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15076 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15078 *total += rtx_cost (XEXP (x, 1), outer_code);
15082 else if (GET_CODE (XEXP (x, 0)) == MULT
15083 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15085 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15086 if (val == 2 || val == 4 || val == 8)
15088 *total = COSTS_N_INSNS (ix86_cost->lea);
15089 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15090 *total += rtx_cost (XEXP (x, 1), outer_code);
15094 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15096 *total = COSTS_N_INSNS (ix86_cost->lea);
15097 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15098 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15099 *total += rtx_cost (XEXP (x, 1), outer_code);
15106 if (FLOAT_MODE_P (mode))
15108 *total = COSTS_N_INSNS (ix86_cost->fadd);
15116 if (!TARGET_64BIT && mode == DImode)
15118 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15119 + (rtx_cost (XEXP (x, 0), outer_code)
15120 << (GET_MODE (XEXP (x, 0)) != DImode))
15121 + (rtx_cost (XEXP (x, 1), outer_code)
15122 << (GET_MODE (XEXP (x, 1)) != DImode)));
15128 if (FLOAT_MODE_P (mode))
15130 *total = COSTS_N_INSNS (ix86_cost->fchs);
15136 if (!TARGET_64BIT && mode == DImode)
15137 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15139 *total = COSTS_N_INSNS (ix86_cost->add);
15143 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15148 if (FLOAT_MODE_P (mode))
15149 *total = COSTS_N_INSNS (ix86_cost->fabs);
15153 if (FLOAT_MODE_P (mode))
15154 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15158 if (XINT (x, 1) == UNSPEC_TP)
15167 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15169 ix86_svr3_asm_out_constructor (symbol, priority)
15171 int priority ATTRIBUTE_UNUSED;
15174 fputs ("\tpushl $", asm_out_file);
15175 assemble_name (asm_out_file, XSTR (symbol, 0));
15176 fputc ('\n', asm_out_file);
15182 static int current_machopic_label_num;
15184 /* Given a symbol name and its associated stub, write out the
15185 definition of the stub. */
15188 machopic_output_stub (file, symb, stub)
15190 const char *symb, *stub;
15192 unsigned int length;
15193 char *binder_name, *symbol_name, lazy_ptr_name[32];
15194 int label = ++current_machopic_label_num;
15196 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15197 symb = (*targetm.strip_name_encoding) (symb);
15199 length = strlen (stub);
15200 binder_name = alloca (length + 32);
15201 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15203 length = strlen (symb);
15204 symbol_name = alloca (length + 32);
15205 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15207 sprintf (lazy_ptr_name, "L%d$lz", label);
15210 machopic_picsymbol_stub_section ();
15212 machopic_symbol_stub_section ();
15214 fprintf (file, "%s:\n", stub);
15215 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15219 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15220 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15221 fprintf (file, "\tjmp %%edx\n");
15224 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15226 fprintf (file, "%s:\n", binder_name);
15230 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15231 fprintf (file, "\tpushl %%eax\n");
15234 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15236 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15238 machopic_lazy_symbol_ptr_section ();
15239 fprintf (file, "%s:\n", lazy_ptr_name);
15240 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15241 fprintf (file, "\t.long %s\n", binder_name);
15243 #endif /* TARGET_MACHO */
15245 /* Order the registers for register allocator. */
15248 x86_order_regs_for_local_alloc ()
15253 /* First allocate the local general purpose registers. */
15254 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15255 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15256 reg_alloc_order [pos++] = i;
15258 /* Global general purpose registers. */
15259 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15260 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15261 reg_alloc_order [pos++] = i;
15263 /* x87 registers come first in case we are doing FP math
15265 if (!TARGET_SSE_MATH)
15266 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15267 reg_alloc_order [pos++] = i;
15269 /* SSE registers. */
15270 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15271 reg_alloc_order [pos++] = i;
15272 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15273 reg_alloc_order [pos++] = i;
15275 /* x87 registers. */
15276 if (TARGET_SSE_MATH)
15277 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15278 reg_alloc_order [pos++] = i;
15280 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15281 reg_alloc_order [pos++] = i;
15283 /* Initialize the rest of array as we do not allocate some registers
15285 while (pos < FIRST_PSEUDO_REGISTER)
15286 reg_alloc_order [pos++] = 0;
15289 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15290 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15293 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15294 struct attribute_spec.handler. */
15296 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15299 tree args ATTRIBUTE_UNUSED;
15300 int flags ATTRIBUTE_UNUSED;
15301 bool *no_add_attrs;
15304 if (DECL_P (*node))
15306 if (TREE_CODE (*node) == TYPE_DECL)
15307 type = &TREE_TYPE (*node);
15312 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15313 || TREE_CODE (*type) == UNION_TYPE)))
15315 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15316 *no_add_attrs = true;
15319 else if ((is_attribute_p ("ms_struct", name)
15320 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15321 || ((is_attribute_p ("gcc_struct", name)
15322 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15324 warning ("`%s' incompatible attribute ignored",
15325 IDENTIFIER_POINTER (name));
15326 *no_add_attrs = true;
15333 ix86_ms_bitfield_layout_p (record_type)
15336 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15337 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15338 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15341 /* Returns an expression indicating where the this parameter is
15342 located on entry to the FUNCTION. */
15345 x86_this_parameter (function)
15348 tree type = TREE_TYPE (function);
15352 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15353 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15356 if (ix86_fntype_regparm (type) > 0)
15360 parm = TYPE_ARG_TYPES (type);
15361 /* Figure out whether or not the function has a variable number of
15363 for (; parm; parm = TREE_CHAIN (parm))
15364 if (TREE_VALUE (parm) == void_type_node)
15366 /* If not, the this parameter is in %eax. */
15368 return gen_rtx_REG (SImode, 0);
15371 if (aggregate_value_p (TREE_TYPE (type)))
15372 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15374 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15377 /* Determine whether x86_output_mi_thunk can succeed. */
15380 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15381 tree thunk ATTRIBUTE_UNUSED;
15382 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15383 HOST_WIDE_INT vcall_offset;
15386 /* 64-bit can handle anything. */
15390 /* For 32-bit, everything's fine if we have one free register. */
15391 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15394 /* Need a free register for vcall_offset. */
15398 /* Need a free register for GOT references. */
15399 if (flag_pic && !(*targetm.binds_local_p) (function))
15402 /* Otherwise ok. */
15406 /* Output the assembler code for a thunk function. THUNK_DECL is the
15407 declaration for the thunk function itself, FUNCTION is the decl for
15408 the target function. DELTA is an immediate constant offset to be
15409 added to THIS. If VCALL_OFFSET is nonzero, the word at
15410 *(*this + vcall_offset) should be added to THIS. */
15413 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15414 FILE *file ATTRIBUTE_UNUSED;
15415 tree thunk ATTRIBUTE_UNUSED;
15416 HOST_WIDE_INT delta;
15417 HOST_WIDE_INT vcall_offset;
15421 rtx this = x86_this_parameter (function);
15424 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15425 pull it in now and let DELTA benefit. */
15428 else if (vcall_offset)
15430 /* Put the this parameter into %eax. */
15432 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15433 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15436 this_reg = NULL_RTX;
15438 /* Adjust the this parameter by a fixed constant. */
15441 xops[0] = GEN_INT (delta);
15442 xops[1] = this_reg ? this_reg : this;
15445 if (!x86_64_general_operand (xops[0], DImode))
15447 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15449 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15453 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15456 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15459 /* Adjust the this parameter by a value stored in the vtable. */
15463 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15465 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15467 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15470 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15472 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15474 /* Adjust the this parameter. */
15475 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15476 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15478 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15479 xops[0] = GEN_INT (vcall_offset);
15481 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15482 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15484 xops[1] = this_reg;
15486 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15488 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15491 /* If necessary, drop THIS back to its stack slot. */
15492 if (this_reg && this_reg != this)
15494 xops[0] = this_reg;
15496 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15499 xops[0] = XEXP (DECL_RTL (function), 0);
15502 if (!flag_pic || (*targetm.binds_local_p) (function))
15503 output_asm_insn ("jmp\t%P0", xops);
15506 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15507 tmp = gen_rtx_CONST (Pmode, tmp);
15508 tmp = gen_rtx_MEM (QImode, tmp);
15510 output_asm_insn ("jmp\t%A0", xops);
15515 if (!flag_pic || (*targetm.binds_local_p) (function))
15516 output_asm_insn ("jmp\t%P0", xops);
15521 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15522 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15523 tmp = gen_rtx_MEM (QImode, tmp);
15525 output_asm_insn ("jmp\t%0", xops);
15528 #endif /* TARGET_MACHO */
15530 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15531 output_set_got (tmp);
15534 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15535 output_asm_insn ("jmp\t{*}%1", xops);
15543 default_file_start ();
15544 if (X86_FILE_START_VERSION_DIRECTIVE)
15545 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15546 if (X86_FILE_START_FLTUSED)
15547 fputs ("\t.global\t__fltused\n", asm_out_file);
15548 if (ix86_asm_dialect == ASM_INTEL)
15549 fputs ("\t.intel_syntax\n", asm_out_file);
15553 x86_field_alignment (field, computed)
15557 enum machine_mode mode;
15558 tree type = TREE_TYPE (field);
15560 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15562 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15563 ? get_inner_array_type (type) : type);
15564 if (mode == DFmode || mode == DCmode
15565 || GET_MODE_CLASS (mode) == MODE_INT
15566 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15567 return MIN (32, computed);
15571 /* Output assembler code to FILE to increment profiler label # LABELNO
15572 for profiling a function entry. */
15574 x86_function_profiler (file, labelno)
15576 int labelno ATTRIBUTE_UNUSED;
15581 #ifndef NO_PROFILE_COUNTERS
15582 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15584 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15588 #ifndef NO_PROFILE_COUNTERS
15589 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15591 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15595 #ifndef NO_PROFILE_COUNTERS
15596 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15597 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15599 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15603 #ifndef NO_PROFILE_COUNTERS
15604 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15605 PROFILE_COUNT_REGISTER);
15607 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15611 /* We don't have exact information about the insn sizes, but we may assume
15612 quite safely that we are informed about all 1 byte insns and memory
15613 address sizes. This is enought to elliminate unnecesary padding in
15617 min_insn_size (insn)
15622 if (!INSN_P (insn) || !active_insn_p (insn))
15625 /* Discard alignments we've emit and jump instructions. */
15626 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15627 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15629 if (GET_CODE (insn) == JUMP_INSN
15630 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15631 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15634 /* Important case - calls are always 5 bytes.
15635 It is common to have many calls in the row. */
15636 if (GET_CODE (insn) == CALL_INSN
15637 && symbolic_reference_mentioned_p (PATTERN (insn))
15638 && !SIBLING_CALL_P (insn))
15640 if (get_attr_length (insn) <= 1)
15643 /* For normal instructions we may rely on the sizes of addresses
15644 and the presence of symbol to require 4 bytes of encoding.
15645 This is not the case for jumps where references are PC relative. */
15646 if (GET_CODE (insn) != JUMP_INSN)
15648 l = get_attr_length_address (insn);
15649 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15658 /* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
15662 k8_avoid_jump_misspredicts ()
15664 rtx insn, start = get_insns ();
15665 int nbytes = 0, njumps = 0;
15668 /* Look for all minimal intervals of instructions containing 4 jumps.
15669 The intervals are bounded by START and INSN. NBYTES is the total
15670 size of instructions in the interval including INSN and not including
15671 START. When the NBYTES is smaller than 16 bytes, it is possible
15672 that the end of START and INSN ends up in the same 16byte page.
15674 The smallest offset in the page INSN can start is the case where START
15675 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15676 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15681 nbytes += min_insn_size (insn);
15683 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15684 INSN_UID (insn), min_insn_size (insn));
15685 if ((GET_CODE (insn) == JUMP_INSN
15686 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15687 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15688 || GET_CODE (insn) == CALL_INSN)
15695 start = NEXT_INSN (start);
15696 if ((GET_CODE (start) == JUMP_INSN
15697 && GET_CODE (PATTERN (start)) != ADDR_VEC
15698 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15699 || GET_CODE (start) == CALL_INSN)
15700 njumps--, isjump = 1;
15703 nbytes -= min_insn_size (start);
15708 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15709 INSN_UID (start), INSN_UID (insn), nbytes);
15711 if (njumps == 3 && isjump && nbytes < 16)
15713 int padsize = 15 - nbytes + min_insn_size (insn);
15716 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15717 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15722 /* Implement machine specific optimizations.
15723 At the moment we implement single transformation: AMD Athlon works faster
15724 when RET is not destination of conditional jump or directly preceded
15725 by other jump instruction. We avoid the penalty by inserting NOP just
15726 before the RET instructions in such cases. */
15732 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15734 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15736 basic_block bb = e->src;
15739 bool replace = false;
15741 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15742 || !maybe_hot_bb_p (bb))
15744 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15745 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15747 if (prev && GET_CODE (prev) == CODE_LABEL)
15750 for (e = bb->pred; e; e = e->pred_next)
15751 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15752 && !(e->flags & EDGE_FALLTHRU))
15757 prev = prev_active_insn (ret);
15759 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15760 || GET_CODE (prev) == CALL_INSN))
15762 /* Empty functions get branch misspredict even when the jump destination
15763 is not visible to us. */
15764 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15769 emit_insn_before (gen_return_internal_long (), ret);
15773 k8_avoid_jump_misspredicts ();
15776 /* Return nonzero when QImode register that must be represented via REX prefix
15779 x86_extended_QIreg_mentioned_p (insn)
15783 extract_insn_cached (insn);
15784 for (i = 0; i < recog_data.n_operands; i++)
15785 if (REG_P (recog_data.operand[i])
15786 && REGNO (recog_data.operand[i]) >= 4)
15791 /* Return nonzero when P points to register encoded via REX prefix.
15792 Called via for_each_rtx. */
15794 extended_reg_mentioned_1 (p, data)
15796 void *data ATTRIBUTE_UNUSED;
15798 unsigned int regno;
15801 regno = REGNO (*p);
15802 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15805 /* Return true when INSN mentions register that must be encoded using REX
15808 x86_extended_reg_mentioned_p (insn)
15811 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15814 /* Generate an unsigned DImode to FP conversion. This is the same code
15815 optabs would emit if we didn't have TFmode patterns. */
15818 x86_emit_floatuns (operands)
15821 rtx neglab, donelab, i0, i1, f0, in, out;
15822 enum machine_mode mode;
15825 in = force_reg (DImode, operands[1]);
15826 mode = GET_MODE (out);
15827 neglab = gen_label_rtx ();
15828 donelab = gen_label_rtx ();
15829 i1 = gen_reg_rtx (Pmode);
15830 f0 = gen_reg_rtx (mode);
15832 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15834 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15835 emit_jump_insn (gen_jump (donelab));
15838 emit_label (neglab);
15840 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15841 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15842 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15843 expand_float (f0, i0, 0);
15844 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15846 emit_label (donelab);
15849 /* Return if we do not know how to pass TYPE solely in registers. */
15851 ix86_must_pass_in_stack (mode, type)
15852 enum machine_mode mode;
15855 if (default_must_pass_in_stack (mode, type))
15857 return (!TARGET_64BIT && type && mode == TImode);
15860 #include "gt-i386.h"