1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* Some CPU cores are not able to predict more than 4 branch instructions in
528 the 16 byte window. */
529 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
531 /* In case the average insn count for single function invocation is
532 lower than this constant, emit fast (but longer) prologue and
534 #define FAST_PROLOGUE_INSN_COUNT 20
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
547 AREG, DREG, CREG, BREG,
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 #define MAX_386_STACK_LOCALS 3
675 /* Size of the register save area. */
676 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
678 /* Define the structure for the machine field in struct function. */
680 struct stack_local_entry GTY(())
685 struct stack_local_entry *next;
688 /* Structure describing stack frame layout.
689 Stack grows downward:
695 saved frame pointer if frame_pointer_needed
696 <- HARD_FRAME_POINTER
702 > to_allocate <- FRAME_POINTER
714 int outgoing_arguments_size;
717 HOST_WIDE_INT to_allocate;
718 /* The offsets relative to ARG_POINTER. */
719 HOST_WIDE_INT frame_pointer_offset;
720 HOST_WIDE_INT hard_frame_pointer_offset;
721 HOST_WIDE_INT stack_pointer_offset;
723 /* When save_regs_using_mov is set, emit prologue using
724 move instead of push instructions. */
725 bool save_regs_using_mov;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string; /* for -mtune=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand (rtx, enum machine_mode);
789 static int tls_symbolic_operand_1 (rtx, enum tls_model);
790 static void output_pic_addr_const (FILE *, rtx, int);
791 static void put_condition_code (enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name (void);
794 static int get_some_local_dynamic_name_1 (rtx *, void *);
795 static rtx maybe_get_pool_constant (rtx);
796 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
797 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
799 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
800 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
802 static rtx get_thread_pointer (int);
803 static rtx legitimize_tls_address (rtx, enum tls_model, int);
804 static void get_pc_thunk_name (char [32], unsigned int);
805 static rtx gen_push (rtx);
806 static int memory_address_length (rtx addr);
807 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
808 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
809 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
810 static void ix86_dump_ppro_packet (FILE *);
811 static void ix86_reorder_insn (rtx *, rtx *);
812 static struct machine_function * ix86_init_machine_status (void);
813 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
814 static int ix86_nsaved_regs (void);
815 static void ix86_emit_save_regs (void);
816 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
817 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
818 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
819 static void ix86_sched_reorder_ppro (rtx *, rtx *);
820 static HOST_WIDE_INT ix86_GOT_alias_set (void);
821 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
822 static rtx ix86_expand_aligntest (rtx, int);
823 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
824 static int ix86_issue_rate (void);
825 static int ix86_adjust_cost (rtx, rtx, rtx, int);
826 static void ix86_sched_init (FILE *, int, int);
827 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
828 static int ix86_variable_issue (FILE *, int, rtx, int);
829 static int ia32_use_dfa_pipeline_interface (void);
830 static int ia32_multipass_dfa_lookahead (void);
831 static void ix86_init_mmx_sse_builtins (void);
832 static rtx x86_this_parameter (tree);
833 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree);
835 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
836 static void x86_file_start (void);
837 static void ix86_reorg (void);
838 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
839 static tree ix86_build_builtin_va_list (void);
840 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
845 rtx base, index, disp;
847 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
850 static int ix86_decompose_address (rtx, struct ix86_address *);
851 static int ix86_address_cost (rtx);
852 static bool ix86_cannot_force_const_mem (rtx);
853 static rtx ix86_delegitimize_address (rtx);
855 struct builtin_description;
856 static rtx ix86_expand_sse_comi (const struct builtin_description *,
858 static rtx ix86_expand_sse_compare (const struct builtin_description *,
860 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
861 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
862 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
863 static rtx ix86_expand_store_builtin (enum insn_code, tree);
864 static rtx safe_vector_operand (rtx, enum machine_mode);
865 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
866 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
867 enum rtx_code *, enum rtx_code *);
868 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
869 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
870 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
871 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
872 static int ix86_fp_comparison_cost (enum rtx_code code);
873 static unsigned int ix86_select_alt_pic_regnum (void);
874 static int ix86_save_reg (unsigned int, int);
875 static void ix86_compute_frame_layout (struct ix86_frame *);
876 static int ix86_comp_type_attributes (tree, tree);
877 static int ix86_function_regparm (tree, tree);
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall (tree, tree);
880 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
881 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
882 static int ix86_value_regno (enum machine_mode);
883 static bool contains_128bit_aligned_vector_p (tree);
884 static bool ix86_ms_bitfield_layout_p (tree);
885 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
886 static int extended_reg_mentioned_1 (rtx *, void *);
887 static bool ix86_rtx_costs (rtx, int, int, int *);
888 static int min_insn_size (rtx);
890 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
891 static void ix86_svr3_asm_out_constructor (rtx, int);
894 /* Register class used for passing given 64bit part of the argument.
895 These represent classes as documented by the PS ABI, with the exception
896 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
897 use SF or DFmode move instead of DImode to avoid reformatting penalties.
899 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
900 whenever possible (upper half does contain padding).
902 enum x86_64_reg_class
905 X86_64_INTEGER_CLASS,
906 X86_64_INTEGERSI_CLASS,
915 static const char * const x86_64_reg_class_name[] =
916 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
918 #define MAX_CLASSES 4
919 static int classify_argument (enum machine_mode, tree,
920 enum x86_64_reg_class [MAX_CLASSES], int);
921 static int examine_argument (enum machine_mode, tree, int, int *, int *);
922 static rtx construct_container (enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
925 enum x86_64_reg_class);
927 /* Table of constants used by fldpi, fldln2, etc.... */
928 static REAL_VALUE_TYPE ext_80387_constants_table [5];
929 static bool ext_80387_constants_init = 0;
930 static void init_ext_80387_constants (void);
932 /* Initialize the GCC target structure. */
933 #undef TARGET_ATTRIBUTE_TABLE
934 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
935 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
936 # undef TARGET_MERGE_DECL_ATTRIBUTES
937 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
940 #undef TARGET_COMP_TYPE_ATTRIBUTES
941 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
943 #undef TARGET_INIT_BUILTINS
944 #define TARGET_INIT_BUILTINS ix86_init_builtins
946 #undef TARGET_EXPAND_BUILTIN
947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
949 #undef TARGET_ASM_FUNCTION_EPILOGUE
950 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
952 #undef TARGET_ASM_OPEN_PAREN
953 #define TARGET_ASM_OPEN_PAREN ""
954 #undef TARGET_ASM_CLOSE_PAREN
955 #define TARGET_ASM_CLOSE_PAREN ""
957 #undef TARGET_ASM_ALIGNED_HI_OP
958 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959 #undef TARGET_ASM_ALIGNED_SI_OP
960 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
962 #undef TARGET_ASM_ALIGNED_DI_OP
963 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
966 #undef TARGET_ASM_UNALIGNED_HI_OP
967 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968 #undef TARGET_ASM_UNALIGNED_SI_OP
969 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970 #undef TARGET_ASM_UNALIGNED_DI_OP
971 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
973 #undef TARGET_SCHED_ADJUST_COST
974 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975 #undef TARGET_SCHED_ISSUE_RATE
976 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977 #undef TARGET_SCHED_VARIABLE_ISSUE
978 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979 #undef TARGET_SCHED_INIT
980 #define TARGET_SCHED_INIT ix86_sched_init
981 #undef TARGET_SCHED_REORDER
982 #define TARGET_SCHED_REORDER ix86_sched_reorder
983 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
984 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985 ia32_use_dfa_pipeline_interface
986 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988 ia32_multipass_dfa_lookahead
990 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
991 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
994 #undef TARGET_HAVE_TLS
995 #define TARGET_HAVE_TLS true
997 #undef TARGET_CANNOT_FORCE_CONST_MEM
998 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1000 #undef TARGET_DELEGITIMIZE_ADDRESS
1001 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1003 #undef TARGET_MS_BITFIELD_LAYOUT_P
1004 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1006 #undef TARGET_ASM_OUTPUT_MI_THUNK
1007 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1008 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1011 #undef TARGET_ASM_FILE_START
1012 #define TARGET_ASM_FILE_START x86_file_start
1014 #undef TARGET_RTX_COSTS
1015 #define TARGET_RTX_COSTS ix86_rtx_costs
1016 #undef TARGET_ADDRESS_COST
1017 #define TARGET_ADDRESS_COST ix86_address_cost
1019 #undef TARGET_FIXED_CONDITION_CODE_REGS
1020 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1021 #undef TARGET_CC_MODES_COMPATIBLE
1022 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1024 #undef TARGET_MACHINE_DEPENDENT_REORG
1025 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1027 #undef TARGET_BUILD_BUILTIN_VA_LIST
1028 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1030 #undef TARGET_PROMOTE_PROTOTYPES
1031 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1033 #undef TARGET_SETUP_INCOMING_VARARGS
1034 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1036 struct gcc_target targetm = TARGET_INITIALIZER;
1038 /* The svr4 ABI for the i386 says that records and unions are returned
1040 #ifndef DEFAULT_PCC_STRUCT_RETURN
1041 #define DEFAULT_PCC_STRUCT_RETURN 1
1044 /* Sometimes certain combinations of command options do not make
1045 sense on a particular target machine. You can define a macro
1046 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1047 defined, is executed once just after all the command options have
1050 Don't use this macro to turn on various extra optimizations for
1051 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1054 override_options (void)
1057 /* Comes from final.c -- no real reason to change it. */
1058 #define MAX_CODE_ALIGN 16
1062 const struct processor_costs *cost; /* Processor costs */
1063 const int target_enable; /* Target flags to enable. */
1064 const int target_disable; /* Target flags to disable. */
1065 const int align_loop; /* Default alignments. */
1066 const int align_loop_max_skip;
1067 const int align_jump;
1068 const int align_jump_max_skip;
1069 const int align_func;
1071 const processor_target_table[PROCESSOR_max] =
1073 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1074 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1075 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1077 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1078 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1079 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1080 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1083 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1086 const char *const name; /* processor name or nickname. */
1087 const enum processor_type processor;
1088 const enum pta_flags
1093 PTA_PREFETCH_SSE = 8,
1099 const processor_alias_table[] =
1101 {"i386", PROCESSOR_I386, 0},
1102 {"i486", PROCESSOR_I486, 0},
1103 {"i586", PROCESSOR_PENTIUM, 0},
1104 {"pentium", PROCESSOR_PENTIUM, 0},
1105 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1106 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1107 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1108 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1109 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1110 {"i686", PROCESSOR_PENTIUMPRO, 0},
1111 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1112 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1113 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1114 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1115 PTA_MMX | PTA_PREFETCH_SSE},
1116 {"k6", PROCESSOR_K6, PTA_MMX},
1117 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1119 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1121 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1122 | PTA_3DNOW | PTA_3DNOW_A},
1123 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1124 | PTA_3DNOW_A | PTA_SSE},
1125 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1126 | PTA_3DNOW_A | PTA_SSE},
1127 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1128 | PTA_3DNOW_A | PTA_SSE},
1129 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1130 | PTA_SSE | PTA_SSE2 },
1131 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1132 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1133 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1134 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1135 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1136 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1137 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1138 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1141 int const pta_size = ARRAY_SIZE (processor_alias_table);
1143 /* Set the default values for switches whose default depends on TARGET_64BIT
1144 in case they weren't overwritten by command line options. */
1147 if (flag_omit_frame_pointer == 2)
1148 flag_omit_frame_pointer = 1;
1149 if (flag_asynchronous_unwind_tables == 2)
1150 flag_asynchronous_unwind_tables = 1;
1151 if (flag_pcc_struct_return == 2)
1152 flag_pcc_struct_return = 0;
1156 if (flag_omit_frame_pointer == 2)
1157 flag_omit_frame_pointer = 0;
1158 if (flag_asynchronous_unwind_tables == 2)
1159 flag_asynchronous_unwind_tables = 0;
1160 if (flag_pcc_struct_return == 2)
1161 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1164 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1165 SUBTARGET_OVERRIDE_OPTIONS;
1168 if (!ix86_tune_string && ix86_arch_string)
1169 ix86_tune_string = ix86_arch_string;
1170 if (!ix86_tune_string)
1171 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1172 if (!ix86_arch_string)
1173 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1175 if (ix86_cmodel_string != 0)
1177 if (!strcmp (ix86_cmodel_string, "small"))
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1180 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1181 else if (!strcmp (ix86_cmodel_string, "32"))
1182 ix86_cmodel = CM_32;
1183 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1184 ix86_cmodel = CM_KERNEL;
1185 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1186 ix86_cmodel = CM_MEDIUM;
1187 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1188 ix86_cmodel = CM_LARGE;
1190 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1194 ix86_cmodel = CM_32;
1196 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1198 if (ix86_asm_string != 0)
1200 if (!strcmp (ix86_asm_string, "intel"))
1201 ix86_asm_dialect = ASM_INTEL;
1202 else if (!strcmp (ix86_asm_string, "att"))
1203 ix86_asm_dialect = ASM_ATT;
1205 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1207 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1208 error ("code model `%s' not supported in the %s bit mode",
1209 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1210 if (ix86_cmodel == CM_LARGE)
1211 sorry ("code model `large' not supported yet");
1212 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1213 sorry ("%i-bit mode not compiled in",
1214 (target_flags & MASK_64BIT) ? 64 : 32);
1216 for (i = 0; i < pta_size; i++)
1217 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1219 ix86_arch = processor_alias_table[i].processor;
1220 /* Default cpu tuning to the architecture. */
1221 ix86_tune = ix86_arch;
1222 if (processor_alias_table[i].flags & PTA_MMX
1223 && !(target_flags_explicit & MASK_MMX))
1224 target_flags |= MASK_MMX;
1225 if (processor_alias_table[i].flags & PTA_3DNOW
1226 && !(target_flags_explicit & MASK_3DNOW))
1227 target_flags |= MASK_3DNOW;
1228 if (processor_alias_table[i].flags & PTA_3DNOW_A
1229 && !(target_flags_explicit & MASK_3DNOW_A))
1230 target_flags |= MASK_3DNOW_A;
1231 if (processor_alias_table[i].flags & PTA_SSE
1232 && !(target_flags_explicit & MASK_SSE))
1233 target_flags |= MASK_SSE;
1234 if (processor_alias_table[i].flags & PTA_SSE2
1235 && !(target_flags_explicit & MASK_SSE2))
1236 target_flags |= MASK_SSE2;
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
1239 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1240 error ("CPU you selected does not support x86-64 instruction set");
1245 error ("bad value (%s) for -march= switch", ix86_arch_string);
1247 for (i = 0; i < pta_size; i++)
1248 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1250 ix86_tune = processor_alias_table[i].processor;
1251 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1252 error ("CPU you selected does not support x86-64 instruction set");
1255 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1256 x86_prefetch_sse = true;
1258 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1261 ix86_cost = &size_cost;
1263 ix86_cost = processor_target_table[ix86_tune].cost;
1264 target_flags |= processor_target_table[ix86_tune].target_enable;
1265 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1267 /* Arrange to set up i386_stack_locals for all functions. */
1268 init_machine_status = ix86_init_machine_status;
1270 /* Validate -mregparm= value. */
1271 if (ix86_regparm_string)
1273 i = atoi (ix86_regparm_string);
1274 if (i < 0 || i > REGPARM_MAX)
1275 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1281 ix86_regparm = REGPARM_MAX;
1283 /* If the user has provided any of the -malign-* options,
1284 warn and use that value only if -falign-* is not set.
1285 Remove this code in GCC 3.2 or later. */
1286 if (ix86_align_loops_string)
1288 warning ("-malign-loops is obsolete, use -falign-loops");
1289 if (align_loops == 0)
1291 i = atoi (ix86_align_loops_string);
1292 if (i < 0 || i > MAX_CODE_ALIGN)
1293 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1295 align_loops = 1 << i;
1299 if (ix86_align_jumps_string)
1301 warning ("-malign-jumps is obsolete, use -falign-jumps");
1302 if (align_jumps == 0)
1304 i = atoi (ix86_align_jumps_string);
1305 if (i < 0 || i > MAX_CODE_ALIGN)
1306 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1308 align_jumps = 1 << i;
1312 if (ix86_align_funcs_string)
1314 warning ("-malign-functions is obsolete, use -falign-functions");
1315 if (align_functions == 0)
1317 i = atoi (ix86_align_funcs_string);
1318 if (i < 0 || i > MAX_CODE_ALIGN)
1319 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1321 align_functions = 1 << i;
1325 /* Default align_* from the processor table. */
1326 if (align_loops == 0)
1328 align_loops = processor_target_table[ix86_tune].align_loop;
1329 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1331 if (align_jumps == 0)
1333 align_jumps = processor_target_table[ix86_tune].align_jump;
1334 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1336 if (align_functions == 0)
1338 align_functions = processor_target_table[ix86_tune].align_func;
1341 /* Validate -mpreferred-stack-boundary= value, or provide default.
1342 The default of 128 bits is for Pentium III's SSE __m128, but we
1343 don't want additional code to keep the stack aligned when
1344 optimizing for code size. */
1345 ix86_preferred_stack_boundary = (optimize_size
1346 ? TARGET_64BIT ? 128 : 32
1348 if (ix86_preferred_stack_boundary_string)
1350 i = atoi (ix86_preferred_stack_boundary_string);
1351 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1352 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1353 TARGET_64BIT ? 4 : 2);
1355 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1358 /* Validate -mbranch-cost= value, or provide default. */
1359 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1360 if (ix86_branch_cost_string)
1362 i = atoi (ix86_branch_cost_string);
1364 error ("-mbranch-cost=%d is not between 0 and 5", i);
1366 ix86_branch_cost = i;
1369 if (ix86_tls_dialect_string)
1371 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1372 ix86_tls_dialect = TLS_DIALECT_GNU;
1373 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1374 ix86_tls_dialect = TLS_DIALECT_SUN;
1376 error ("bad value (%s) for -mtls-dialect= switch",
1377 ix86_tls_dialect_string);
1380 /* Keep nonleaf frame pointers. */
1381 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1382 flag_omit_frame_pointer = 1;
1384 /* If we're doing fast math, we don't care about comparison order
1385 wrt NaNs. This lets us use a shorter comparison sequence. */
1386 if (flag_unsafe_math_optimizations)
1387 target_flags &= ~MASK_IEEE_FP;
1389 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1390 since the insns won't need emulation. */
1391 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1392 target_flags &= ~MASK_NO_FANCY_MATH_387;
1394 /* Turn on SSE2 builtins for -mpni. */
1396 target_flags |= MASK_SSE2;
1398 /* Turn on SSE builtins for -msse2. */
1400 target_flags |= MASK_SSE;
1404 if (TARGET_ALIGN_DOUBLE)
1405 error ("-malign-double makes no sense in the 64bit mode");
1407 error ("-mrtd calling convention not supported in the 64bit mode");
1408 /* Enable by default the SSE and MMX builtins. */
1409 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1410 ix86_fpmath = FPMATH_SSE;
1414 ix86_fpmath = FPMATH_387;
1415 /* i386 ABI does not specify red zone. It still makes sense to use it
1416 when programmer takes care to stack from being destroyed. */
1417 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1418 target_flags |= MASK_NO_RED_ZONE;
1421 if (ix86_fpmath_string != 0)
1423 if (! strcmp (ix86_fpmath_string, "387"))
1424 ix86_fpmath = FPMATH_387;
1425 else if (! strcmp (ix86_fpmath_string, "sse"))
1429 warning ("SSE instruction set disabled, using 387 arithmetics");
1430 ix86_fpmath = FPMATH_387;
1433 ix86_fpmath = FPMATH_SSE;
1435 else if (! strcmp (ix86_fpmath_string, "387,sse")
1436 || ! strcmp (ix86_fpmath_string, "sse,387"))
1440 warning ("SSE instruction set disabled, using 387 arithmetics");
1441 ix86_fpmath = FPMATH_387;
1443 else if (!TARGET_80387)
1445 warning ("387 instruction set disabled, using SSE arithmetics");
1446 ix86_fpmath = FPMATH_SSE;
1449 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1452 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1455 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1459 target_flags |= MASK_MMX;
1460 x86_prefetch_sse = true;
1463 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1466 target_flags |= MASK_MMX;
1467 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1468 extensions it adds. */
1469 if (x86_3dnow_a & (1 << ix86_arch))
1470 target_flags |= MASK_3DNOW_A;
1472 if ((x86_accumulate_outgoing_args & TUNEMASK)
1473 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1475 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1477 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1480 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1481 p = strchr (internal_label_prefix, 'X');
1482 internal_label_prefix_len = p - internal_label_prefix;
1488 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1490 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1491 make the problem with not enough registers even worse. */
1492 #ifdef INSN_SCHEDULING
1494 flag_schedule_insns = 0;
1497 /* The default values of these switches depend on the TARGET_64BIT
1498 that is not known at this moment. Mark these values with 2 and
1499 let user the to override these. In case there is no command line option
1500 specifying them, we will set the defaults in override_options. */
1502 flag_omit_frame_pointer = 2;
1503 flag_pcc_struct_return = 2;
1504 flag_asynchronous_unwind_tables = 2;
1507 /* Table of valid machine attributes. */
1508 const struct attribute_spec ix86_attribute_table[] =
1510 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1511 /* Stdcall attribute says callee is responsible for popping arguments
1512 if they are not variable. */
1513 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1514 /* Fastcall attribute says callee is responsible for popping arguments
1515 if they are not variable. */
1516 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1517 /* Cdecl attribute says the callee is a normal C declaration */
1518 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1519 /* Regparm attribute specifies how many integer arguments are to be
1520 passed in registers. */
1521 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1522 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1523 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1524 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1525 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1527 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1528 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1529 { NULL, 0, 0, false, false, false, NULL }
1532 /* Decide whether we can make a sibling call to a function. DECL is the
1533 declaration of the function being targeted by the call and EXP is the
1534 CALL_EXPR representing the call. */
1537 ix86_function_ok_for_sibcall (tree decl, tree exp)
1539 /* If we are generating position-independent code, we cannot sibcall
1540 optimize any indirect call, or a direct call to a global function,
1541 as the PLT requires %ebx be live. */
1542 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1545 /* If we are returning floats on the 80387 register stack, we cannot
1546 make a sibcall from a function that doesn't return a float to a
1547 function that does or, conversely, from a function that does return
1548 a float to a function that doesn't; the necessary stack adjustment
1549 would not be executed. */
1550 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1551 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1554 /* If this call is indirect, we'll need to be able to use a call-clobbered
1555 register for the address of the target function. Make sure that all
1556 such registers are not used for passing parameters. */
1557 if (!decl && !TARGET_64BIT)
1561 /* We're looking at the CALL_EXPR, we need the type of the function. */
1562 type = TREE_OPERAND (exp, 0); /* pointer expression */
1563 type = TREE_TYPE (type); /* pointer type */
1564 type = TREE_TYPE (type); /* function type */
1566 if (ix86_function_regparm (type, NULL) >= 3)
1568 /* ??? Need to count the actual number of registers to be used,
1569 not the possible number of registers. Fix later. */
1574 /* Otherwise okay. That also includes certain types of indirect calls. */
1578 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1579 arguments as in struct attribute_spec.handler. */
1581 ix86_handle_cdecl_attribute (tree *node, tree name,
1582 tree args ATTRIBUTE_UNUSED,
1583 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1585 if (TREE_CODE (*node) != FUNCTION_TYPE
1586 && TREE_CODE (*node) != METHOD_TYPE
1587 && TREE_CODE (*node) != FIELD_DECL
1588 && TREE_CODE (*node) != TYPE_DECL)
1590 warning ("`%s' attribute only applies to functions",
1591 IDENTIFIER_POINTER (name));
1592 *no_add_attrs = true;
1596 if (is_attribute_p ("fastcall", name))
1598 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1600 error ("fastcall and stdcall attributes are not compatible");
1602 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1604 error ("fastcall and regparm attributes are not compatible");
1607 else if (is_attribute_p ("stdcall", name))
1609 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1611 error ("fastcall and stdcall attributes are not compatible");
1618 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1625 /* Handle a "regparm" attribute;
1626 arguments as in struct attribute_spec.handler. */
1628 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1629 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1631 if (TREE_CODE (*node) != FUNCTION_TYPE
1632 && TREE_CODE (*node) != METHOD_TYPE
1633 && TREE_CODE (*node) != FIELD_DECL
1634 && TREE_CODE (*node) != TYPE_DECL)
1636 warning ("`%s' attribute only applies to functions",
1637 IDENTIFIER_POINTER (name));
1638 *no_add_attrs = true;
1644 cst = TREE_VALUE (args);
1645 if (TREE_CODE (cst) != INTEGER_CST)
1647 warning ("`%s' attribute requires an integer constant argument",
1648 IDENTIFIER_POINTER (name));
1649 *no_add_attrs = true;
1651 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1653 warning ("argument to `%s' attribute larger than %d",
1654 IDENTIFIER_POINTER (name), REGPARM_MAX);
1655 *no_add_attrs = true;
1658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1660 error ("fastcall and regparm attributes are not compatible");
1667 /* Return 0 if the attributes for two types are incompatible, 1 if they
1668 are compatible, and 2 if they are nearly compatible (which causes a
1669 warning to be generated). */
1672 ix86_comp_type_attributes (tree type1, tree type2)
1674 /* Check for mismatch of non-default calling convention. */
1675 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1677 if (TREE_CODE (type1) != FUNCTION_TYPE)
1680 /* Check for mismatched fastcall types */
1681 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1682 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1685 /* Check for mismatched return types (cdecl vs stdcall). */
1686 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1687 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1692 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1693 DECL may be NULL when calling function indirectly
1694 or considering a libcall. */
1697 ix86_function_regparm (tree type, tree decl)
1700 int regparm = ix86_regparm;
1701 bool user_convention = false;
1705 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1708 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1709 user_convention = true;
1712 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1715 user_convention = true;
1718 /* Use register calling convention for local functions when possible. */
1719 if (!TARGET_64BIT && !user_convention && decl
1720 && flag_unit_at_a_time && !profile_flag)
1722 struct cgraph_local_info *i = cgraph_local_info (decl);
1725 /* We can't use regparm(3) for nested functions as these use
1726 static chain pointer in third argument. */
1727 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1737 /* Return true if EAX is live at the start of the function. Used by
1738 ix86_expand_prologue to determine if we need special help before
1739 calling allocate_stack_worker. */
1742 ix86_eax_live_at_start_p (void)
1744 /* Cheat. Don't bother working forward from ix86_function_regparm
1745 to the function type to whether an actual argument is located in
1746 eax. Instead just look at cfg info, which is still close enough
1747 to correct at this point. This gives false positives for broken
1748 functions that might use uninitialized data that happens to be
1749 allocated in eax, but who cares? */
1750 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1753 /* Value is the number of bytes of arguments automatically
1754 popped when returning from a subroutine call.
1755 FUNDECL is the declaration node of the function (as a tree),
1756 FUNTYPE is the data type of the function (as a tree),
1757 or for a library call it is an identifier node for the subroutine name.
1758 SIZE is the number of bytes of arguments passed on the stack.
1760 On the 80386, the RTD insn may be used to pop them if the number
1761 of args is fixed, but if the number is variable then the caller
1762 must pop them all. RTD can't be used for library calls now
1763 because the library is compiled with the Unix compiler.
1764 Use of RTD is a selectable option, since it is incompatible with
1765 standard Unix calling sequences. If the option is not selected,
1766 the caller must always pop the args.
1768 The attribute stdcall is equivalent to RTD on a per module basis. */
1771 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1773 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1775 /* Cdecl functions override -mrtd, and never pop the stack. */
1776 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1778 /* Stdcall and fastcall functions will pop the stack if not
1780 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1781 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1785 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1786 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1787 == void_type_node)))
1791 /* Lose any fake structure return argument if it is passed on the stack. */
1792 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1795 int nregs = ix86_function_regparm (funtype, fundecl);
1798 return GET_MODE_SIZE (Pmode);
1804 /* Argument support functions. */
1806 /* Return true when register may be used to pass function parameters. */
1808 ix86_function_arg_regno_p (int regno)
1812 return (regno < REGPARM_MAX
1813 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1814 if (SSE_REGNO_P (regno) && TARGET_SSE)
1816 /* RAX is used as hidden argument to va_arg functions. */
1819 for (i = 0; i < REGPARM_MAX; i++)
1820 if (regno == x86_64_int_parameter_registers[i])
1825 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1826 for a call to a function whose data type is FNTYPE.
1827 For a library call, FNTYPE is 0. */
1830 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1831 tree fntype, /* tree ptr for function decl */
1832 rtx libname, /* SYMBOL_REF of library name or 0 */
1835 static CUMULATIVE_ARGS zero_cum;
1836 tree param, next_param;
1838 if (TARGET_DEBUG_ARG)
1840 fprintf (stderr, "\ninit_cumulative_args (");
1842 fprintf (stderr, "fntype code = %s, ret code = %s",
1843 tree_code_name[(int) TREE_CODE (fntype)],
1844 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1846 fprintf (stderr, "no fntype");
1849 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1854 /* Set up the number of registers to use for passing arguments. */
1856 cum->nregs = ix86_function_regparm (fntype, fndecl);
1858 cum->nregs = ix86_regparm;
1859 cum->sse_nregs = SSE_REGPARM_MAX;
1860 cum->mmx_nregs = MMX_REGPARM_MAX;
1861 cum->warn_sse = true;
1862 cum->warn_mmx = true;
1863 cum->maybe_vaarg = false;
1865 /* Use ecx and edx registers if function has fastcall attribute */
1866 if (fntype && !TARGET_64BIT)
1868 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1876 /* Determine if this function has variable arguments. This is
1877 indicated by the last argument being 'void_type_mode' if there
1878 are no variable arguments. If there are variable arguments, then
1879 we won't pass anything in registers */
1881 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1883 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1884 param != 0; param = next_param)
1886 next_param = TREE_CHAIN (param);
1887 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1898 cum->maybe_vaarg = true;
1902 if ((!fntype && !libname)
1903 || (fntype && !TYPE_ARG_TYPES (fntype)))
1904 cum->maybe_vaarg = 1;
1906 if (TARGET_DEBUG_ARG)
1907 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1912 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1913 of this code is to classify each 8bytes of incoming argument by the register
1914 class and assign registers accordingly. */
1916 /* Return the union class of CLASS1 and CLASS2.
1917 See the x86-64 PS ABI for details. */
1919 static enum x86_64_reg_class
1920 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1922 /* Rule #1: If both classes are equal, this is the resulting class. */
1923 if (class1 == class2)
1926 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1928 if (class1 == X86_64_NO_CLASS)
1930 if (class2 == X86_64_NO_CLASS)
1933 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1934 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1935 return X86_64_MEMORY_CLASS;
1937 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1938 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1939 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1940 return X86_64_INTEGERSI_CLASS;
1941 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1942 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1943 return X86_64_INTEGER_CLASS;
1945 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1946 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1947 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1948 return X86_64_MEMORY_CLASS;
1950 /* Rule #6: Otherwise class SSE is used. */
1951 return X86_64_SSE_CLASS;
1954 /* Classify the argument of type TYPE and mode MODE.
1955 CLASSES will be filled by the register class used to pass each word
1956 of the operand. The number of words is returned. In case the parameter
1957 should be passed in memory, 0 is returned. As a special case for zero
1958 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1960 BIT_OFFSET is used internally for handling records and specifies offset
1961 of the offset in bits modulo 256 to avoid overflow cases.
1963 See the x86-64 PS ABI for details.
1967 classify_argument (enum machine_mode mode, tree type,
1968 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1970 HOST_WIDE_INT bytes =
1971 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1972 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1974 /* Variable sized entities are always passed/returned in memory. */
1978 if (mode != VOIDmode
1979 && MUST_PASS_IN_STACK (mode, type))
1982 if (type && AGGREGATE_TYPE_P (type))
1986 enum x86_64_reg_class subclasses[MAX_CLASSES];
1988 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1992 for (i = 0; i < words; i++)
1993 classes[i] = X86_64_NO_CLASS;
1995 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1996 signalize memory class, so handle it as special case. */
1999 classes[0] = X86_64_NO_CLASS;
2003 /* Classify each field of record and merge classes. */
2004 if (TREE_CODE (type) == RECORD_TYPE)
2006 /* For classes first merge in the field of the subclasses. */
2007 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2009 tree bases = TYPE_BINFO_BASETYPES (type);
2010 int n_bases = TREE_VEC_LENGTH (bases);
2013 for (i = 0; i < n_bases; ++i)
2015 tree binfo = TREE_VEC_ELT (bases, i);
2017 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2018 tree type = BINFO_TYPE (binfo);
2020 num = classify_argument (TYPE_MODE (type),
2022 (offset + bit_offset) % 256);
2025 for (i = 0; i < num; i++)
2027 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2029 merge_classes (subclasses[i], classes[i + pos]);
2033 /* And now merge the fields of structure. */
2034 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2036 if (TREE_CODE (field) == FIELD_DECL)
2040 /* Bitfields are always classified as integer. Handle them
2041 early, since later code would consider them to be
2042 misaligned integers. */
2043 if (DECL_BIT_FIELD (field))
2045 for (i = int_bit_position (field) / 8 / 8;
2046 i < (int_bit_position (field)
2047 + tree_low_cst (DECL_SIZE (field), 0)
2050 merge_classes (X86_64_INTEGER_CLASS,
2055 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2056 TREE_TYPE (field), subclasses,
2057 (int_bit_position (field)
2058 + bit_offset) % 256);
2061 for (i = 0; i < num; i++)
2064 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2066 merge_classes (subclasses[i], classes[i + pos]);
2072 /* Arrays are handled as small records. */
2073 else if (TREE_CODE (type) == ARRAY_TYPE)
2076 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2077 TREE_TYPE (type), subclasses, bit_offset);
2081 /* The partial classes are now full classes. */
2082 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2083 subclasses[0] = X86_64_SSE_CLASS;
2084 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2085 subclasses[0] = X86_64_INTEGER_CLASS;
2087 for (i = 0; i < words; i++)
2088 classes[i] = subclasses[i % num];
2090 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2091 else if (TREE_CODE (type) == UNION_TYPE
2092 || TREE_CODE (type) == QUAL_UNION_TYPE)
2094 /* For classes first merge in the field of the subclasses. */
2095 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2097 tree bases = TYPE_BINFO_BASETYPES (type);
2098 int n_bases = TREE_VEC_LENGTH (bases);
2101 for (i = 0; i < n_bases; ++i)
2103 tree binfo = TREE_VEC_ELT (bases, i);
2105 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2106 tree type = BINFO_TYPE (binfo);
2108 num = classify_argument (TYPE_MODE (type),
2110 (offset + (bit_offset % 64)) % 256);
2113 for (i = 0; i < num; i++)
2115 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2117 merge_classes (subclasses[i], classes[i + pos]);
2121 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2123 if (TREE_CODE (field) == FIELD_DECL)
2126 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2127 TREE_TYPE (field), subclasses,
2131 for (i = 0; i < num; i++)
2132 classes[i] = merge_classes (subclasses[i], classes[i]);
2136 else if (TREE_CODE (type) == SET_TYPE)
2140 classes[0] = X86_64_INTEGERSI_CLASS;
2143 else if (bytes <= 8)
2145 classes[0] = X86_64_INTEGER_CLASS;
2148 else if (bytes <= 12)
2150 classes[0] = X86_64_INTEGER_CLASS;
2151 classes[1] = X86_64_INTEGERSI_CLASS;
2156 classes[0] = X86_64_INTEGER_CLASS;
2157 classes[1] = X86_64_INTEGER_CLASS;
2164 /* Final merger cleanup. */
2165 for (i = 0; i < words; i++)
2167 /* If one class is MEMORY, everything should be passed in
2169 if (classes[i] == X86_64_MEMORY_CLASS)
2172 /* The X86_64_SSEUP_CLASS should be always preceded by
2173 X86_64_SSE_CLASS. */
2174 if (classes[i] == X86_64_SSEUP_CLASS
2175 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2176 classes[i] = X86_64_SSE_CLASS;
2178 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2179 if (classes[i] == X86_64_X87UP_CLASS
2180 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2181 classes[i] = X86_64_SSE_CLASS;
2186 /* Compute alignment needed. We align all types to natural boundaries with
2187 exception of XFmode that is aligned to 64bits. */
2188 if (mode != VOIDmode && mode != BLKmode)
2190 int mode_alignment = GET_MODE_BITSIZE (mode);
2193 mode_alignment = 128;
2194 else if (mode == XCmode)
2195 mode_alignment = 256;
2196 /* Misaligned fields are always returned in memory. */
2197 if (bit_offset % mode_alignment)
2201 /* Classification of atomic types. */
2211 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2212 classes[0] = X86_64_INTEGERSI_CLASS;
2214 classes[0] = X86_64_INTEGER_CLASS;
2218 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2221 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2222 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2225 if (!(bit_offset % 64))
2226 classes[0] = X86_64_SSESF_CLASS;
2228 classes[0] = X86_64_SSE_CLASS;
2231 classes[0] = X86_64_SSEDF_CLASS;
2234 classes[0] = X86_64_X87_CLASS;
2235 classes[1] = X86_64_X87UP_CLASS;
2241 classes[0] = X86_64_X87_CLASS;
2242 classes[1] = X86_64_X87UP_CLASS;
2243 classes[2] = X86_64_X87_CLASS;
2244 classes[3] = X86_64_X87UP_CLASS;
2247 classes[0] = X86_64_SSEDF_CLASS;
2248 classes[1] = X86_64_SSEDF_CLASS;
2251 classes[0] = X86_64_SSE_CLASS;
2259 classes[0] = X86_64_SSE_CLASS;
2260 classes[1] = X86_64_SSEUP_CLASS;
2275 /* Examine the argument and return set number of register required in each
2276 class. Return 0 iff parameter should be passed in memory. */
2278 examine_argument (enum machine_mode mode, tree type, int in_return,
2279 int *int_nregs, int *sse_nregs)
2281 enum x86_64_reg_class class[MAX_CLASSES];
2282 int n = classify_argument (mode, type, class, 0);
2288 for (n--; n >= 0; n--)
2291 case X86_64_INTEGER_CLASS:
2292 case X86_64_INTEGERSI_CLASS:
2295 case X86_64_SSE_CLASS:
2296 case X86_64_SSESF_CLASS:
2297 case X86_64_SSEDF_CLASS:
2300 case X86_64_NO_CLASS:
2301 case X86_64_SSEUP_CLASS:
2303 case X86_64_X87_CLASS:
2304 case X86_64_X87UP_CLASS:
2308 case X86_64_MEMORY_CLASS:
2313 /* Construct container for the argument used by GCC interface. See
2314 FUNCTION_ARG for the detailed description. */
2316 construct_container (enum machine_mode mode, tree type, int in_return,
2317 int nintregs, int nsseregs, const int * intreg,
2320 enum machine_mode tmpmode;
2322 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2323 enum x86_64_reg_class class[MAX_CLASSES];
2327 int needed_sseregs, needed_intregs;
2328 rtx exp[MAX_CLASSES];
2331 n = classify_argument (mode, type, class, 0);
2332 if (TARGET_DEBUG_ARG)
2335 fprintf (stderr, "Memory class\n");
2338 fprintf (stderr, "Classes:");
2339 for (i = 0; i < n; i++)
2341 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2343 fprintf (stderr, "\n");
2348 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2350 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2353 /* First construct simple cases. Avoid SCmode, since we want to use
2354 single register to pass this type. */
2355 if (n == 1 && mode != SCmode)
2358 case X86_64_INTEGER_CLASS:
2359 case X86_64_INTEGERSI_CLASS:
2360 return gen_rtx_REG (mode, intreg[0]);
2361 case X86_64_SSE_CLASS:
2362 case X86_64_SSESF_CLASS:
2363 case X86_64_SSEDF_CLASS:
2364 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2365 case X86_64_X87_CLASS:
2366 return gen_rtx_REG (mode, FIRST_STACK_REG);
2367 case X86_64_NO_CLASS:
2368 /* Zero sized array, struct or class. */
2373 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2374 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2376 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2377 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2378 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2379 && class[1] == X86_64_INTEGER_CLASS
2380 && (mode == CDImode || mode == TImode || mode == TFmode)
2381 && intreg[0] + 1 == intreg[1])
2382 return gen_rtx_REG (mode, intreg[0]);
2384 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2385 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2386 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2388 /* Otherwise figure out the entries of the PARALLEL. */
2389 for (i = 0; i < n; i++)
2393 case X86_64_NO_CLASS:
2395 case X86_64_INTEGER_CLASS:
2396 case X86_64_INTEGERSI_CLASS:
2397 /* Merge TImodes on aligned occasions here too. */
2398 if (i * 8 + 8 > bytes)
2399 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2400 else if (class[i] == X86_64_INTEGERSI_CLASS)
2404 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2405 if (tmpmode == BLKmode)
2407 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2408 gen_rtx_REG (tmpmode, *intreg),
2412 case X86_64_SSESF_CLASS:
2413 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2414 gen_rtx_REG (SFmode,
2415 SSE_REGNO (sse_regno)),
2419 case X86_64_SSEDF_CLASS:
2420 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2421 gen_rtx_REG (DFmode,
2422 SSE_REGNO (sse_regno)),
2426 case X86_64_SSE_CLASS:
2427 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2431 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2432 gen_rtx_REG (tmpmode,
2433 SSE_REGNO (sse_regno)),
2435 if (tmpmode == TImode)
2443 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2444 for (i = 0; i < nexps; i++)
2445 XVECEXP (ret, 0, i) = exp [i];
2449 /* Update the data in CUM to advance over an argument
2450 of mode MODE and data type TYPE.
2451 (TYPE is null for libcalls where that information may not be available.) */
2454 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2455 enum machine_mode mode, /* current arg mode */
2456 tree type, /* type of the argument or 0 if lib support */
2457 int named) /* whether or not the argument was named */
2460 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2461 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2463 if (TARGET_DEBUG_ARG)
2465 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2466 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2469 int int_nregs, sse_nregs;
2470 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2471 cum->words += words;
2472 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2474 cum->nregs -= int_nregs;
2475 cum->sse_nregs -= sse_nregs;
2476 cum->regno += int_nregs;
2477 cum->sse_regno += sse_nregs;
2480 cum->words += words;
2484 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2485 && (!type || !AGGREGATE_TYPE_P (type)))
2487 cum->sse_words += words;
2488 cum->sse_nregs -= 1;
2489 cum->sse_regno += 1;
2490 if (cum->sse_nregs <= 0)
2496 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2497 && (!type || !AGGREGATE_TYPE_P (type)))
2499 cum->mmx_words += words;
2500 cum->mmx_nregs -= 1;
2501 cum->mmx_regno += 1;
2502 if (cum->mmx_nregs <= 0)
2510 cum->words += words;
2511 cum->nregs -= words;
2512 cum->regno += words;
2514 if (cum->nregs <= 0)
2524 /* Define where to put the arguments to a function.
2525 Value is zero to push the argument on the stack,
2526 or a hard register in which to store the argument.
2528 MODE is the argument's machine mode.
2529 TYPE is the data type of the argument (as a tree).
2530 This is null for libcalls where that information may
2532 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2533 the preceding args and about the function being called.
2534 NAMED is nonzero if this argument is a named parameter
2535 (otherwise it is an extra parameter matching an ellipsis). */
2538 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2539 enum machine_mode mode, /* current arg mode */
2540 tree type, /* type of the argument or 0 if lib support */
2541 int named) /* != 0 for normal args, == 0 for ... args */
2545 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2546 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2547 static bool warnedsse, warnedmmx;
2549 /* Handle a hidden AL argument containing number of registers for varargs
2550 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2552 if (mode == VOIDmode)
2555 return GEN_INT (cum->maybe_vaarg
2556 ? (cum->sse_nregs < 0
2564 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2565 &x86_64_int_parameter_registers [cum->regno],
2570 /* For now, pass fp/complex values on the stack. */
2582 if (words <= cum->nregs)
2584 int regno = cum->regno;
2586 /* Fastcall allocates the first two DWORD (SImode) or
2587 smaller arguments to ECX and EDX. */
2590 if (mode == BLKmode || mode == DImode)
2593 /* ECX not EAX is the first allocated register. */
2597 ret = gen_rtx_REG (mode, regno);
2607 if (!type || !AGGREGATE_TYPE_P (type))
2609 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2612 warning ("SSE vector argument without SSE enabled "
2616 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2623 if (!type || !AGGREGATE_TYPE_P (type))
2625 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2628 warning ("MMX vector argument without MMX enabled "
2632 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2637 if (TARGET_DEBUG_ARG)
2640 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2641 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2644 print_simple_rtl (stderr, ret);
2646 fprintf (stderr, ", stack");
2648 fprintf (stderr, " )\n");
2654 /* A C expression that indicates when an argument must be passed by
2655 reference. If nonzero for an argument, a copy of that argument is
2656 made in memory and a pointer to the argument is passed instead of
2657 the argument itself. The pointer is passed in whatever way is
2658 appropriate for passing a pointer to that type. */
2661 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2662 enum machine_mode mode ATTRIBUTE_UNUSED,
2663 tree type, int named ATTRIBUTE_UNUSED)
2668 if (type && int_size_in_bytes (type) == -1)
2670 if (TARGET_DEBUG_ARG)
2671 fprintf (stderr, "function_arg_pass_by_reference\n");
2678 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2681 contains_128bit_aligned_vector_p (tree type)
2683 enum machine_mode mode = TYPE_MODE (type);
2684 if (SSE_REG_MODE_P (mode)
2685 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2687 if (TYPE_ALIGN (type) < 128)
2690 if (AGGREGATE_TYPE_P (type))
2692 /* Walk the aggregates recursively. */
2693 if (TREE_CODE (type) == RECORD_TYPE
2694 || TREE_CODE (type) == UNION_TYPE
2695 || TREE_CODE (type) == QUAL_UNION_TYPE)
2699 if (TYPE_BINFO (type) != NULL
2700 && TYPE_BINFO_BASETYPES (type) != NULL)
2702 tree bases = TYPE_BINFO_BASETYPES (type);
2703 int n_bases = TREE_VEC_LENGTH (bases);
2706 for (i = 0; i < n_bases; ++i)
2708 tree binfo = TREE_VEC_ELT (bases, i);
2709 tree type = BINFO_TYPE (binfo);
2711 if (contains_128bit_aligned_vector_p (type))
2715 /* And now merge the fields of structure. */
2716 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2718 if (TREE_CODE (field) == FIELD_DECL
2719 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2723 /* Just for use if some languages passes arrays by value. */
2724 else if (TREE_CODE (type) == ARRAY_TYPE)
2726 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2735 /* Gives the alignment boundary, in bits, of an argument with the
2736 specified mode and type. */
2739 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2743 align = TYPE_ALIGN (type);
2745 align = GET_MODE_ALIGNMENT (mode);
2746 if (align < PARM_BOUNDARY)
2747 align = PARM_BOUNDARY;
2750 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2751 make an exception for SSE modes since these require 128bit
2754 The handling here differs from field_alignment. ICC aligns MMX
2755 arguments to 4 byte boundaries, while structure fields are aligned
2756 to 8 byte boundaries. */
2759 if (!SSE_REG_MODE_P (mode))
2760 align = PARM_BOUNDARY;
2764 if (!contains_128bit_aligned_vector_p (type))
2765 align = PARM_BOUNDARY;
2773 /* Return true if N is a possible register number of function value. */
2775 ix86_function_value_regno_p (int regno)
2779 return ((regno) == 0
2780 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2781 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2783 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2784 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2785 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2788 /* Define how to find the value returned by a function.
2789 VALTYPE is the data type of the value (as a tree).
2790 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2791 otherwise, FUNC is 0. */
2793 ix86_function_value (tree valtype)
2797 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2798 REGPARM_MAX, SSE_REGPARM_MAX,
2799 x86_64_int_return_registers, 0);
2800 /* For zero sized structures, construct_container return NULL, but we need
2801 to keep rest of compiler happy by returning meaningful value. */
2803 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2807 return gen_rtx_REG (TYPE_MODE (valtype),
2808 ix86_value_regno (TYPE_MODE (valtype)));
2811 /* Return false iff type is returned in memory. */
2813 ix86_return_in_memory (tree type)
2815 int needed_intregs, needed_sseregs, size;
2816 enum machine_mode mode = TYPE_MODE (type);
2819 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2821 if (mode == BLKmode)
2824 size = int_size_in_bytes (type);
2826 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2829 if (VECTOR_MODE_P (mode) || mode == TImode)
2831 /* User-created vectors small enough to fit in EAX. */
2835 /* MMX/3dNow values are returned on the stack, since we've
2836 got to EMMS/FEMMS before returning. */
2840 /* SSE values are returned in XMM0. */
2841 /* ??? Except when it doesn't exist? We have a choice of
2842 either (1) being abi incompatible with a -march switch,
2843 or (2) generating an error here. Given no good solution,
2844 I think the safest thing is one warning. The user won't
2845 be able to use -Werror, but.... */
2856 warning ("SSE vector return without SSE enabled "
2871 /* Define how to find the value returned by a library function
2872 assuming the value has mode MODE. */
2874 ix86_libcall_value (enum machine_mode mode)
2884 return gen_rtx_REG (mode, FIRST_SSE_REG);
2887 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2892 return gen_rtx_REG (mode, 0);
2896 return gen_rtx_REG (mode, ix86_value_regno (mode));
2899 /* Given a mode, return the register to use for a return value. */
2902 ix86_value_regno (enum machine_mode mode)
2904 /* Floating point return values in %st(0). */
2905 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2906 return FIRST_FLOAT_REG;
2907 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2908 we prevent this case when sse is not available. */
2909 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2910 return FIRST_SSE_REG;
2911 /* Everything else in %eax. */
2915 /* Create the va_list data type. */
2918 ix86_build_builtin_va_list (void)
2920 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2922 /* For i386 we use plain pointer to argument area. */
2924 return build_pointer_type (char_type_node);
2926 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2927 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2929 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2930 unsigned_type_node);
2931 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2932 unsigned_type_node);
2933 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2935 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2938 DECL_FIELD_CONTEXT (f_gpr) = record;
2939 DECL_FIELD_CONTEXT (f_fpr) = record;
2940 DECL_FIELD_CONTEXT (f_ovf) = record;
2941 DECL_FIELD_CONTEXT (f_sav) = record;
2943 TREE_CHAIN (record) = type_decl;
2944 TYPE_NAME (record) = type_decl;
2945 TYPE_FIELDS (record) = f_gpr;
2946 TREE_CHAIN (f_gpr) = f_fpr;
2947 TREE_CHAIN (f_fpr) = f_ovf;
2948 TREE_CHAIN (f_ovf) = f_sav;
2950 layout_type (record);
2952 /* The correct type is an array type of one element. */
2953 return build_array_type (record, build_index_type (size_zero_node));
2956 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
2959 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2960 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2963 CUMULATIVE_ARGS next_cum;
2964 rtx save_area = NULL_RTX, mem;
2977 /* Indicate to allocate space on the stack for varargs save area. */
2978 ix86_save_varrargs_registers = 1;
2980 cfun->stack_alignment_needed = 128;
2982 fntype = TREE_TYPE (current_function_decl);
2983 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2984 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2985 != void_type_node));
2987 /* For varargs, we do not want to skip the dummy va_dcl argument.
2988 For stdargs, we do want to skip the last named argument. */
2991 function_arg_advance (&next_cum, mode, type, 1);
2994 save_area = frame_pointer_rtx;
2996 set = get_varargs_alias_set ();
2998 for (i = next_cum.regno; i < ix86_regparm; i++)
3000 mem = gen_rtx_MEM (Pmode,
3001 plus_constant (save_area, i * UNITS_PER_WORD));
3002 set_mem_alias_set (mem, set);
3003 emit_move_insn (mem, gen_rtx_REG (Pmode,
3004 x86_64_int_parameter_registers[i]));
3007 if (next_cum.sse_nregs)
3009 /* Now emit code to save SSE registers. The AX parameter contains number
3010 of SSE parameter registers used to call this function. We use
3011 sse_prologue_save insn template that produces computed jump across
3012 SSE saves. We need some preparation work to get this working. */
3014 label = gen_label_rtx ();
3015 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3017 /* Compute address to jump to :
3018 label - 5*eax + nnamed_sse_arguments*5 */
3019 tmp_reg = gen_reg_rtx (Pmode);
3020 nsse_reg = gen_reg_rtx (Pmode);
3021 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3022 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3023 gen_rtx_MULT (Pmode, nsse_reg,
3025 if (next_cum.sse_regno)
3028 gen_rtx_CONST (DImode,
3029 gen_rtx_PLUS (DImode,
3031 GEN_INT (next_cum.sse_regno * 4))));
3033 emit_move_insn (nsse_reg, label_ref);
3034 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3036 /* Compute address of memory block we save into. We always use pointer
3037 pointing 127 bytes after first byte to store - this is needed to keep
3038 instruction size limited by 4 bytes. */
3039 tmp_reg = gen_reg_rtx (Pmode);
3040 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3041 plus_constant (save_area,
3042 8 * REGPARM_MAX + 127)));
3043 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3044 set_mem_alias_set (mem, set);
3045 set_mem_align (mem, BITS_PER_WORD);
3047 /* And finally do the dirty job! */
3048 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3049 GEN_INT (next_cum.sse_regno), label));
3054 /* Implement va_start. */
3057 ix86_va_start (tree valist, rtx nextarg)
3059 HOST_WIDE_INT words, n_gpr, n_fpr;
3060 tree f_gpr, f_fpr, f_ovf, f_sav;
3061 tree gpr, fpr, ovf, sav, t;
3063 /* Only 64bit target needs something special. */
3066 std_expand_builtin_va_start (valist, nextarg);
3070 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3071 f_fpr = TREE_CHAIN (f_gpr);
3072 f_ovf = TREE_CHAIN (f_fpr);
3073 f_sav = TREE_CHAIN (f_ovf);
3075 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3076 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3077 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3078 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3079 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3081 /* Count number of gp and fp argument registers used. */
3082 words = current_function_args_info.words;
3083 n_gpr = current_function_args_info.regno;
3084 n_fpr = current_function_args_info.sse_regno;
3086 if (TARGET_DEBUG_ARG)
3087 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3088 (int) words, (int) n_gpr, (int) n_fpr);
3090 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3091 build_int_2 (n_gpr * 8, 0));
3092 TREE_SIDE_EFFECTS (t) = 1;
3093 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3095 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3096 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3097 TREE_SIDE_EFFECTS (t) = 1;
3098 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3100 /* Find the overflow area. */
3101 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3103 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3104 build_int_2 (words * UNITS_PER_WORD, 0));
3105 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3106 TREE_SIDE_EFFECTS (t) = 1;
3107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3109 /* Find the register save area.
3110 Prologue of the function save it right above stack frame. */
3111 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3112 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3113 TREE_SIDE_EFFECTS (t) = 1;
3114 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3117 /* Implement va_arg. */
3119 ix86_va_arg (tree valist, tree type)
3121 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3122 tree f_gpr, f_fpr, f_ovf, f_sav;
3123 tree gpr, fpr, ovf, sav, t;
3125 rtx lab_false, lab_over = NULL_RTX;
3130 /* Only 64bit target needs something special. */
3133 return std_expand_builtin_va_arg (valist, type);
3136 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3137 f_fpr = TREE_CHAIN (f_gpr);
3138 f_ovf = TREE_CHAIN (f_fpr);
3139 f_sav = TREE_CHAIN (f_ovf);
3141 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3142 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3143 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3144 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3145 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3147 size = int_size_in_bytes (type);
3150 /* Passed by reference. */
3152 type = build_pointer_type (type);
3153 size = int_size_in_bytes (type);
3155 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3157 container = construct_container (TYPE_MODE (type), type, 0,
3158 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3160 * Pull the value out of the saved registers ...
3163 addr_rtx = gen_reg_rtx (Pmode);
3167 rtx int_addr_rtx, sse_addr_rtx;
3168 int needed_intregs, needed_sseregs;
3171 lab_over = gen_label_rtx ();
3172 lab_false = gen_label_rtx ();
3174 examine_argument (TYPE_MODE (type), type, 0,
3175 &needed_intregs, &needed_sseregs);
3178 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3179 || TYPE_ALIGN (type) > 128);
3181 /* In case we are passing structure, verify that it is consecutive block
3182 on the register save area. If not we need to do moves. */
3183 if (!need_temp && !REG_P (container))
3185 /* Verify that all registers are strictly consecutive */
3186 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3190 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3192 rtx slot = XVECEXP (container, 0, i);
3193 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3194 || INTVAL (XEXP (slot, 1)) != i * 16)
3202 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3204 rtx slot = XVECEXP (container, 0, i);
3205 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3206 || INTVAL (XEXP (slot, 1)) != i * 8)
3213 int_addr_rtx = addr_rtx;
3214 sse_addr_rtx = addr_rtx;
3218 int_addr_rtx = gen_reg_rtx (Pmode);
3219 sse_addr_rtx = gen_reg_rtx (Pmode);
3221 /* First ensure that we fit completely in registers. */
3224 emit_cmp_and_jump_insns (expand_expr
3225 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3226 GEN_INT ((REGPARM_MAX - needed_intregs +
3227 1) * 8), GE, const1_rtx, SImode,
3232 emit_cmp_and_jump_insns (expand_expr
3233 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3234 GEN_INT ((SSE_REGPARM_MAX -
3235 needed_sseregs + 1) * 16 +
3236 REGPARM_MAX * 8), GE, const1_rtx,
3237 SImode, 1, lab_false);
3240 /* Compute index to start of area used for integer regs. */
3243 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3244 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3245 if (r != int_addr_rtx)
3246 emit_move_insn (int_addr_rtx, r);
3250 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3251 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3252 if (r != sse_addr_rtx)
3253 emit_move_insn (sse_addr_rtx, r);
3261 /* Never use the memory itself, as it has the alias set. */
3262 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3263 mem = gen_rtx_MEM (BLKmode, x);
3264 force_operand (x, addr_rtx);
3265 set_mem_alias_set (mem, get_varargs_alias_set ());
3266 set_mem_align (mem, BITS_PER_UNIT);
3268 for (i = 0; i < XVECLEN (container, 0); i++)
3270 rtx slot = XVECEXP (container, 0, i);
3271 rtx reg = XEXP (slot, 0);
3272 enum machine_mode mode = GET_MODE (reg);
3278 if (SSE_REGNO_P (REGNO (reg)))
3280 src_addr = sse_addr_rtx;
3281 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3285 src_addr = int_addr_rtx;
3286 src_offset = REGNO (reg) * 8;
3288 src_mem = gen_rtx_MEM (mode, src_addr);
3289 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3290 src_mem = adjust_address (src_mem, mode, src_offset);
3291 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3292 emit_move_insn (dest_mem, src_mem);
3299 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3300 build_int_2 (needed_intregs * 8, 0));
3301 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3302 TREE_SIDE_EFFECTS (t) = 1;
3303 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3308 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3309 build_int_2 (needed_sseregs * 16, 0));
3310 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3311 TREE_SIDE_EFFECTS (t) = 1;
3312 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3315 emit_jump_insn (gen_jump (lab_over));
3317 emit_label (lab_false);
3320 /* ... otherwise out of the overflow area. */
3322 /* Care for on-stack alignment if needed. */
3323 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3327 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3328 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3329 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3333 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3335 emit_move_insn (addr_rtx, r);
3338 build (PLUS_EXPR, TREE_TYPE (t), t,
3339 build_int_2 (rsize * UNITS_PER_WORD, 0));
3340 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3341 TREE_SIDE_EFFECTS (t) = 1;
3342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3345 emit_label (lab_over);
3349 r = gen_rtx_MEM (Pmode, addr_rtx);
3350 set_mem_alias_set (r, get_varargs_alias_set ());
3351 emit_move_insn (addr_rtx, r);
3357 /* Return nonzero if OP is either a i387 or SSE fp register. */
3359 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3361 return ANY_FP_REG_P (op);
3364 /* Return nonzero if OP is an i387 fp register. */
3366 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3368 return FP_REG_P (op);
3371 /* Return nonzero if OP is a non-fp register_operand. */
3373 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3375 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3378 /* Return nonzero if OP is a register operand other than an
3379 i387 fp register. */
3381 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3383 return register_operand (op, mode) && !FP_REG_P (op);
3386 /* Return nonzero if OP is general operand representable on x86_64. */
3389 x86_64_general_operand (rtx op, enum machine_mode mode)
3392 return general_operand (op, mode);
3393 if (nonimmediate_operand (op, mode))
3395 return x86_64_sign_extended_value (op);
3398 /* Return nonzero if OP is general operand representable on x86_64
3399 as either sign extended or zero extended constant. */
3402 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3405 return general_operand (op, mode);
3406 if (nonimmediate_operand (op, mode))
3408 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3411 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3414 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3417 return nonmemory_operand (op, mode);
3418 if (register_operand (op, mode))
3420 return x86_64_sign_extended_value (op);
3423 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3426 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3428 if (!TARGET_64BIT || !flag_pic)
3429 return nonmemory_operand (op, mode);
3430 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3432 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3437 /* Return nonzero if OPNUM's MEM should be matched
3438 in movabs* patterns. */
3441 ix86_check_movabs (rtx insn, int opnum)
3445 set = PATTERN (insn);
3446 if (GET_CODE (set) == PARALLEL)
3447 set = XVECEXP (set, 0, 0);
3448 if (GET_CODE (set) != SET)
3450 mem = XEXP (set, opnum);
3451 while (GET_CODE (mem) == SUBREG)
3452 mem = SUBREG_REG (mem);
3453 if (GET_CODE (mem) != MEM)
3455 return (volatile_ok || !MEM_VOLATILE_P (mem));
3458 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3461 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3464 return nonmemory_operand (op, mode);
3465 if (register_operand (op, mode))
3467 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3470 /* Return nonzero if OP is immediate operand representable on x86_64. */
3473 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3476 return immediate_operand (op, mode);
3477 return x86_64_sign_extended_value (op);
3480 /* Return nonzero if OP is immediate operand representable on x86_64. */
3483 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3485 return x86_64_zero_extended_value (op);
3488 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3489 for shift & compare patterns, as shifting by 0 does not change flags),
3490 else return zero. */
3493 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3495 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3498 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3499 reference and a constant. */
3502 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3504 switch (GET_CODE (op))
3512 if (GET_CODE (op) == SYMBOL_REF
3513 || GET_CODE (op) == LABEL_REF
3514 || (GET_CODE (op) == UNSPEC
3515 && (XINT (op, 1) == UNSPEC_GOT
3516 || XINT (op, 1) == UNSPEC_GOTOFF
3517 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3519 if (GET_CODE (op) != PLUS
3520 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3524 if (GET_CODE (op) == SYMBOL_REF
3525 || GET_CODE (op) == LABEL_REF)
3527 /* Only @GOTOFF gets offsets. */
3528 if (GET_CODE (op) != UNSPEC
3529 || XINT (op, 1) != UNSPEC_GOTOFF)
3532 op = XVECEXP (op, 0, 0);
3533 if (GET_CODE (op) == SYMBOL_REF
3534 || GET_CODE (op) == LABEL_REF)
3543 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3546 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3548 if (GET_CODE (op) != CONST)
3553 if (GET_CODE (op) == UNSPEC
3554 && XINT (op, 1) == UNSPEC_GOTPCREL)
3556 if (GET_CODE (op) == PLUS
3557 && GET_CODE (XEXP (op, 0)) == UNSPEC
3558 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3563 if (GET_CODE (op) == UNSPEC)
3565 if (GET_CODE (op) != PLUS
3566 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3569 if (GET_CODE (op) == UNSPEC)
3575 /* Return true if OP is a symbolic operand that resolves locally. */
3578 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3580 if (GET_CODE (op) == CONST
3581 && GET_CODE (XEXP (op, 0)) == PLUS
3582 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3583 op = XEXP (XEXP (op, 0), 0);
3585 if (GET_CODE (op) == LABEL_REF)
3588 if (GET_CODE (op) != SYMBOL_REF)
3591 if (SYMBOL_REF_LOCAL_P (op))
3594 /* There is, however, a not insubstantial body of code in the rest of
3595 the compiler that assumes it can just stick the results of
3596 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3597 /* ??? This is a hack. Should update the body of the compiler to
3598 always create a DECL an invoke targetm.encode_section_info. */
3599 if (strncmp (XSTR (op, 0), internal_label_prefix,
3600 internal_label_prefix_len) == 0)
3606 /* Test for various thread-local symbols. */
3609 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3611 if (GET_CODE (op) != SYMBOL_REF)
3613 return SYMBOL_REF_TLS_MODEL (op);
3617 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3619 if (GET_CODE (op) != SYMBOL_REF)
3621 return SYMBOL_REF_TLS_MODEL (op) == kind;
3625 global_dynamic_symbolic_operand (rtx op,
3626 enum machine_mode mode ATTRIBUTE_UNUSED)
3628 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3632 local_dynamic_symbolic_operand (rtx op,
3633 enum machine_mode mode ATTRIBUTE_UNUSED)
3635 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3639 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3641 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3645 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3647 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3650 /* Test for a valid operand for a call instruction. Don't allow the
3651 arg pointer register or virtual regs since they may decay into
3652 reg + const, which the patterns can't handle. */
3655 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3657 /* Disallow indirect through a virtual register. This leads to
3658 compiler aborts when trying to eliminate them. */
3659 if (GET_CODE (op) == REG
3660 && (op == arg_pointer_rtx
3661 || op == frame_pointer_rtx
3662 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3663 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3666 /* Disallow `call 1234'. Due to varying assembler lameness this
3667 gets either rejected or translated to `call .+1234'. */
3668 if (GET_CODE (op) == CONST_INT)
3671 /* Explicitly allow SYMBOL_REF even if pic. */
3672 if (GET_CODE (op) == SYMBOL_REF)
3675 /* Otherwise we can allow any general_operand in the address. */
3676 return general_operand (op, Pmode);
3679 /* Test for a valid operand for a call instruction. Don't allow the
3680 arg pointer register or virtual regs since they may decay into
3681 reg + const, which the patterns can't handle. */
3684 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3686 /* Disallow indirect through a virtual register. This leads to
3687 compiler aborts when trying to eliminate them. */
3688 if (GET_CODE (op) == REG
3689 && (op == arg_pointer_rtx
3690 || op == frame_pointer_rtx
3691 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3692 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3695 /* Explicitly allow SYMBOL_REF even if pic. */
3696 if (GET_CODE (op) == SYMBOL_REF)
3699 /* Otherwise we can only allow register operands. */
3700 return register_operand (op, Pmode);
3704 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3706 if (GET_CODE (op) == CONST
3707 && GET_CODE (XEXP (op, 0)) == PLUS
3708 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3709 op = XEXP (XEXP (op, 0), 0);
3710 return GET_CODE (op) == SYMBOL_REF;
3713 /* Match exactly zero and one. */
3716 const0_operand (rtx op, enum machine_mode mode)
3718 return op == CONST0_RTX (mode);
3722 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3724 return op == const1_rtx;
3727 /* Match 2, 4, or 8. Used for leal multiplicands. */
3730 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3732 return (GET_CODE (op) == CONST_INT
3733 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3737 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3739 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3743 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3745 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3749 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3751 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3755 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3757 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3761 /* True if this is a constant appropriate for an increment or decrement. */
3764 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3766 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3767 registers, since carry flag is not set. */
3768 if (TARGET_PENTIUM4 && !optimize_size)
3770 return op == const1_rtx || op == constm1_rtx;
3773 /* Return nonzero if OP is acceptable as operand of DImode shift
3777 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3780 return nonimmediate_operand (op, mode);
3782 return register_operand (op, mode);
3785 /* Return false if this is the stack pointer, or any other fake
3786 register eliminable to the stack pointer. Otherwise, this is
3789 This is used to prevent esp from being used as an index reg.
3790 Which would only happen in pathological cases. */
3793 reg_no_sp_operand (rtx op, enum machine_mode mode)
3796 if (GET_CODE (t) == SUBREG)
3798 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3801 return register_operand (op, mode);
3805 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3807 return MMX_REG_P (op);
3810 /* Return false if this is any eliminable register. Otherwise
3814 general_no_elim_operand (rtx op, enum machine_mode mode)
3817 if (GET_CODE (t) == SUBREG)
3819 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3820 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3821 || t == virtual_stack_dynamic_rtx)
3824 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3825 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3828 return general_operand (op, mode);
3831 /* Return false if this is any eliminable register. Otherwise
3832 register_operand or const_int. */
3835 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3838 if (GET_CODE (t) == SUBREG)
3840 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3841 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3842 || t == virtual_stack_dynamic_rtx)
3845 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3848 /* Return false if this is any eliminable register or stack register,
3849 otherwise work like register_operand. */
3852 index_register_operand (rtx op, enum machine_mode mode)
3855 if (GET_CODE (t) == SUBREG)
3859 if (t == arg_pointer_rtx
3860 || t == frame_pointer_rtx
3861 || t == virtual_incoming_args_rtx
3862 || t == virtual_stack_vars_rtx
3863 || t == virtual_stack_dynamic_rtx
3864 || REGNO (t) == STACK_POINTER_REGNUM)
3867 return general_operand (op, mode);
3870 /* Return true if op is a Q_REGS class register. */
3873 q_regs_operand (rtx op, enum machine_mode mode)
3875 if (mode != VOIDmode && GET_MODE (op) != mode)
3877 if (GET_CODE (op) == SUBREG)
3878 op = SUBREG_REG (op);
3879 return ANY_QI_REG_P (op);
3882 /* Return true if op is an flags register. */
3885 flags_reg_operand (rtx op, enum machine_mode mode)
3887 if (mode != VOIDmode && GET_MODE (op) != mode)
3889 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3892 /* Return true if op is a NON_Q_REGS class register. */
3895 non_q_regs_operand (rtx op, enum machine_mode mode)
3897 if (mode != VOIDmode && GET_MODE (op) != mode)
3899 if (GET_CODE (op) == SUBREG)
3900 op = SUBREG_REG (op);
3901 return NON_QI_REG_P (op);
3905 zero_extended_scalar_load_operand (rtx op,
3906 enum machine_mode mode ATTRIBUTE_UNUSED)
3909 if (GET_CODE (op) != MEM)
3911 op = maybe_get_pool_constant (op);
3914 if (GET_CODE (op) != CONST_VECTOR)
3917 (GET_MODE_SIZE (GET_MODE (op)) /
3918 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3919 for (n_elts--; n_elts > 0; n_elts--)
3921 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3922 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3928 /* Return 1 when OP is operand acceptable for standard SSE move. */
3930 vector_move_operand (rtx op, enum machine_mode mode)
3932 if (nonimmediate_operand (op, mode))
3934 if (GET_MODE (op) != mode && mode != VOIDmode)
3936 return (op == CONST0_RTX (GET_MODE (op)));
3939 /* Return true if op if a valid address, and does not contain
3940 a segment override. */
3943 no_seg_address_operand (rtx op, enum machine_mode mode)
3945 struct ix86_address parts;
3947 if (! address_operand (op, mode))
3950 if (! ix86_decompose_address (op, &parts))
3953 return parts.seg == SEG_DEFAULT;
3956 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3959 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3961 enum rtx_code code = GET_CODE (op);
3964 /* Operations supported directly. */
3974 /* These are equivalent to ones above in non-IEEE comparisons. */
3981 return !TARGET_IEEE_FP;
3986 /* Return 1 if OP is a valid comparison operator in valid mode. */
3988 ix86_comparison_operator (rtx op, enum machine_mode mode)
3990 enum machine_mode inmode;
3991 enum rtx_code code = GET_CODE (op);
3992 if (mode != VOIDmode && GET_MODE (op) != mode)
3994 if (GET_RTX_CLASS (code) != '<')
3996 inmode = GET_MODE (XEXP (op, 0));
3998 if (inmode == CCFPmode || inmode == CCFPUmode)
4000 enum rtx_code second_code, bypass_code;
4001 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4002 return (bypass_code == NIL && second_code == NIL);
4009 if (inmode == CCmode || inmode == CCGCmode
4010 || inmode == CCGOCmode || inmode == CCNOmode)
4013 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4014 if (inmode == CCmode)
4018 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4026 /* Return 1 if OP is a valid comparison operator testing carry flag
4029 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4031 enum machine_mode inmode;
4032 enum rtx_code code = GET_CODE (op);
4034 if (mode != VOIDmode && GET_MODE (op) != mode)
4036 if (GET_RTX_CLASS (code) != '<')
4038 inmode = GET_MODE (XEXP (op, 0));
4039 if (GET_CODE (XEXP (op, 0)) != REG
4040 || REGNO (XEXP (op, 0)) != 17
4041 || XEXP (op, 1) != const0_rtx)
4044 if (inmode == CCFPmode || inmode == CCFPUmode)
4046 enum rtx_code second_code, bypass_code;
4048 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4049 if (bypass_code != NIL || second_code != NIL)
4051 code = ix86_fp_compare_code_to_integer (code);
4053 else if (inmode != CCmode)
4058 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4061 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4063 enum machine_mode inmode;
4064 enum rtx_code code = GET_CODE (op);
4066 if (mode != VOIDmode && GET_MODE (op) != mode)
4068 if (GET_RTX_CLASS (code) != '<')
4070 inmode = GET_MODE (XEXP (op, 0));
4071 if (inmode == CCFPmode || inmode == CCFPUmode)
4073 enum rtx_code second_code, bypass_code;
4075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4076 if (bypass_code != NIL || second_code != NIL)
4078 code = ix86_fp_compare_code_to_integer (code);
4080 /* i387 supports just limited amount of conditional codes. */
4083 case LTU: case GTU: case LEU: case GEU:
4084 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4087 case ORDERED: case UNORDERED:
4095 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4098 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4100 switch (GET_CODE (op))
4103 /* Modern CPUs have same latency for HImode and SImode multiply,
4104 but 386 and 486 do HImode multiply faster. */
4105 return ix86_tune > PROCESSOR_I486;
4117 /* Nearly general operand, but accept any const_double, since we wish
4118 to be able to drop them into memory rather than have them get pulled
4122 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4124 if (mode != VOIDmode && mode != GET_MODE (op))
4126 if (GET_CODE (op) == CONST_DOUBLE)
4128 return general_operand (op, mode);
4131 /* Match an SI or HImode register for a zero_extract. */
4134 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4137 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4138 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4141 if (!register_operand (op, VOIDmode))
4144 /* Be careful to accept only registers having upper parts. */
4145 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4146 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4149 /* Return 1 if this is a valid binary floating-point operation.
4150 OP is the expression matched, and MODE is its mode. */
4153 binary_fp_operator (rtx op, enum machine_mode mode)
4155 if (mode != VOIDmode && mode != GET_MODE (op))
4158 switch (GET_CODE (op))
4164 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4172 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4174 return GET_CODE (op) == MULT;
4178 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4180 return GET_CODE (op) == DIV;
4184 arith_or_logical_operator (rtx op, enum machine_mode mode)
4186 return ((mode == VOIDmode || GET_MODE (op) == mode)
4187 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4188 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4191 /* Returns 1 if OP is memory operand with a displacement. */
4194 memory_displacement_operand (rtx op, enum machine_mode mode)
4196 struct ix86_address parts;
4198 if (! memory_operand (op, mode))
4201 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4204 return parts.disp != NULL_RTX;
4207 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4208 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4210 ??? It seems likely that this will only work because cmpsi is an
4211 expander, and no actual insns use this. */
4214 cmpsi_operand (rtx op, enum machine_mode mode)
4216 if (nonimmediate_operand (op, mode))
4219 if (GET_CODE (op) == AND
4220 && GET_MODE (op) == SImode
4221 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4222 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4223 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4224 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4225 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4226 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4232 /* Returns 1 if OP is memory operand that can not be represented by the
4236 long_memory_operand (rtx op, enum machine_mode mode)
4238 if (! memory_operand (op, mode))
4241 return memory_address_length (op) != 0;
4244 /* Return nonzero if the rtx is known aligned. */
4247 aligned_operand (rtx op, enum machine_mode mode)
4249 struct ix86_address parts;
4251 if (!general_operand (op, mode))
4254 /* Registers and immediate operands are always "aligned". */
4255 if (GET_CODE (op) != MEM)
4258 /* Don't even try to do any aligned optimizations with volatiles. */
4259 if (MEM_VOLATILE_P (op))
4264 /* Pushes and pops are only valid on the stack pointer. */
4265 if (GET_CODE (op) == PRE_DEC
4266 || GET_CODE (op) == POST_INC)
4269 /* Decode the address. */
4270 if (! ix86_decompose_address (op, &parts))
4273 /* Look for some component that isn't known to be aligned. */
4277 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4282 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4287 if (GET_CODE (parts.disp) != CONST_INT
4288 || (INTVAL (parts.disp) & 3) != 0)
4292 /* Didn't find one -- this must be an aligned address. */
4296 /* Initialize the table of extra 80387 mathematical constants. */
4299 init_ext_80387_constants (void)
4301 static const char * cst[5] =
4303 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4304 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4305 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4306 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4307 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4311 for (i = 0; i < 5; i++)
4313 real_from_string (&ext_80387_constants_table[i], cst[i]);
4314 /* Ensure each constant is rounded to XFmode precision. */
4315 real_convert (&ext_80387_constants_table[i],
4316 XFmode, &ext_80387_constants_table[i]);
4319 ext_80387_constants_init = 1;
4322 /* Return true if the constant is something that can be loaded with
4323 a special instruction. */
4326 standard_80387_constant_p (rtx x)
4328 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4331 if (x == CONST0_RTX (GET_MODE (x)))
4333 if (x == CONST1_RTX (GET_MODE (x)))
4336 /* For XFmode constants, try to find a special 80387 instruction when
4337 optimizing for size or on those CPUs that benefit from them. */
4338 if (GET_MODE (x) == XFmode
4339 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4344 if (! ext_80387_constants_init)
4345 init_ext_80387_constants ();
4347 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4348 for (i = 0; i < 5; i++)
4349 if (real_identical (&r, &ext_80387_constants_table[i]))
4356 /* Return the opcode of the special instruction to be used to load
4360 standard_80387_constant_opcode (rtx x)
4362 switch (standard_80387_constant_p (x))
4382 /* Return the CONST_DOUBLE representing the 80387 constant that is
4383 loaded by the specified special instruction. The argument IDX
4384 matches the return value from standard_80387_constant_p. */
4387 standard_80387_constant_rtx (int idx)
4391 if (! ext_80387_constants_init)
4392 init_ext_80387_constants ();
4408 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4412 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4415 standard_sse_constant_p (rtx x)
4417 if (x == const0_rtx)
4419 return (x == CONST0_RTX (GET_MODE (x)));
4422 /* Returns 1 if OP contains a symbol reference */
4425 symbolic_reference_mentioned_p (rtx op)
4430 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4433 fmt = GET_RTX_FORMAT (GET_CODE (op));
4434 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4440 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4441 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4445 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4452 /* Return 1 if it is appropriate to emit `ret' instructions in the
4453 body of a function. Do this only if the epilogue is simple, needing a
4454 couple of insns. Prior to reloading, we can't tell how many registers
4455 must be saved, so return 0 then. Return 0 if there is no frame
4456 marker to de-allocate.
4458 If NON_SAVING_SETJMP is defined and true, then it is not possible
4459 for the epilogue to be simple, so return 0. This is a special case
4460 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4461 until final, but jump_optimize may need to know sooner if a
4465 ix86_can_use_return_insn_p (void)
4467 struct ix86_frame frame;
4469 #ifdef NON_SAVING_SETJMP
4470 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4474 if (! reload_completed || frame_pointer_needed)
4477 /* Don't allow more than 32 pop, since that's all we can do
4478 with one instruction. */
4479 if (current_function_pops_args
4480 && current_function_args_size >= 32768)
4483 ix86_compute_frame_layout (&frame);
4484 return frame.to_allocate == 0 && frame.nregs == 0;
4487 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4489 x86_64_sign_extended_value (rtx value)
4491 switch (GET_CODE (value))
4493 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4494 to be at least 32 and this all acceptable constants are
4495 represented as CONST_INT. */
4497 if (HOST_BITS_PER_WIDE_INT == 32)
4501 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4502 return trunc_int_for_mode (val, SImode) == val;
4506 /* For certain code models, the symbolic references are known to fit.
4507 in CM_SMALL_PIC model we know it fits if it is local to the shared
4508 library. Don't count TLS SYMBOL_REFs here, since they should fit
4509 only if inside of UNSPEC handled below. */
4511 /* TLS symbols are not constant. */
4512 if (tls_symbolic_operand (value, Pmode))
4514 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4516 /* For certain code models, the code is near as well. */
4518 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4519 || ix86_cmodel == CM_KERNEL);
4521 /* We also may accept the offsetted memory references in certain special
4524 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4525 switch (XINT (XEXP (value, 0), 1))
4527 case UNSPEC_GOTPCREL:
4529 case UNSPEC_GOTNTPOFF:
4535 if (GET_CODE (XEXP (value, 0)) == PLUS)
4537 rtx op1 = XEXP (XEXP (value, 0), 0);
4538 rtx op2 = XEXP (XEXP (value, 0), 1);
4539 HOST_WIDE_INT offset;
4541 if (ix86_cmodel == CM_LARGE)
4543 if (GET_CODE (op2) != CONST_INT)
4545 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4546 switch (GET_CODE (op1))
4549 /* For CM_SMALL assume that latest object is 16MB before
4550 end of 31bits boundary. We may also accept pretty
4551 large negative constants knowing that all objects are
4552 in the positive half of address space. */
4553 if (ix86_cmodel == CM_SMALL
4554 && offset < 16*1024*1024
4555 && trunc_int_for_mode (offset, SImode) == offset)
4557 /* For CM_KERNEL we know that all object resist in the
4558 negative half of 32bits address space. We may not
4559 accept negative offsets, since they may be just off
4560 and we may accept pretty large positive ones. */
4561 if (ix86_cmodel == CM_KERNEL
4563 && trunc_int_for_mode (offset, SImode) == offset)
4567 /* These conditions are similar to SYMBOL_REF ones, just the
4568 constraints for code models differ. */
4569 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4570 && offset < 16*1024*1024
4571 && trunc_int_for_mode (offset, SImode) == offset)
4573 if (ix86_cmodel == CM_KERNEL
4575 && trunc_int_for_mode (offset, SImode) == offset)
4579 switch (XINT (op1, 1))
4584 && trunc_int_for_mode (offset, SImode) == offset)
4598 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4600 x86_64_zero_extended_value (rtx value)
4602 switch (GET_CODE (value))
4605 if (HOST_BITS_PER_WIDE_INT == 32)
4606 return (GET_MODE (value) == VOIDmode
4607 && !CONST_DOUBLE_HIGH (value));
4611 if (HOST_BITS_PER_WIDE_INT == 32)
4612 return INTVAL (value) >= 0;
4614 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4617 /* For certain code models, the symbolic references are known to fit. */
4619 /* TLS symbols are not constant. */
4620 if (tls_symbolic_operand (value, Pmode))
4622 return ix86_cmodel == CM_SMALL;
4624 /* For certain code models, the code is near as well. */
4626 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4628 /* We also may accept the offsetted memory references in certain special
4631 if (GET_CODE (XEXP (value, 0)) == PLUS)
4633 rtx op1 = XEXP (XEXP (value, 0), 0);
4634 rtx op2 = XEXP (XEXP (value, 0), 1);
4636 if (ix86_cmodel == CM_LARGE)
4638 switch (GET_CODE (op1))
4642 /* For small code model we may accept pretty large positive
4643 offsets, since one bit is available for free. Negative
4644 offsets are limited by the size of NULL pointer area
4645 specified by the ABI. */
4646 if (ix86_cmodel == CM_SMALL
4647 && GET_CODE (op2) == CONST_INT
4648 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4649 && (trunc_int_for_mode (INTVAL (op2), SImode)
4652 /* ??? For the kernel, we may accept adjustment of
4653 -0x10000000, since we know that it will just convert
4654 negative address space to positive, but perhaps this
4655 is not worthwhile. */
4658 /* These conditions are similar to SYMBOL_REF ones, just the
4659 constraints for code models differ. */
4660 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4661 && GET_CODE (op2) == CONST_INT
4662 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4663 && (trunc_int_for_mode (INTVAL (op2), SImode)
4677 /* Value should be nonzero if functions must have frame pointers.
4678 Zero means the frame pointer need not be set up (and parms may
4679 be accessed via the stack pointer) in functions that seem suitable. */
4682 ix86_frame_pointer_required (void)
4684 /* If we accessed previous frames, then the generated code expects
4685 to be able to access the saved ebp value in our frame. */
4686 if (cfun->machine->accesses_prev_frame)
4689 /* Several x86 os'es need a frame pointer for other reasons,
4690 usually pertaining to setjmp. */
4691 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4694 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4695 the frame pointer by default. Turn it back on now if we've not
4696 got a leaf function. */
4697 if (TARGET_OMIT_LEAF_FRAME_POINTER
4698 && (!current_function_is_leaf))
4701 if (current_function_profile)
4707 /* Record that the current function accesses previous call frames. */
4710 ix86_setup_frame_addresses (void)
4712 cfun->machine->accesses_prev_frame = 1;
4715 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4716 # define USE_HIDDEN_LINKONCE 1
4718 # define USE_HIDDEN_LINKONCE 0
4721 static int pic_labels_used;
4723 /* Fills in the label name that should be used for a pc thunk for
4724 the given register. */
4727 get_pc_thunk_name (char name[32], unsigned int regno)
4729 if (USE_HIDDEN_LINKONCE)
4730 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4732 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4736 /* This function generates code for -fpic that loads %ebx with
4737 the return address of the caller and then returns. */
4740 ix86_file_end (void)
4745 for (regno = 0; regno < 8; ++regno)
4749 if (! ((pic_labels_used >> regno) & 1))
4752 get_pc_thunk_name (name, regno);
4754 if (USE_HIDDEN_LINKONCE)
4758 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4760 TREE_PUBLIC (decl) = 1;
4761 TREE_STATIC (decl) = 1;
4762 DECL_ONE_ONLY (decl) = 1;
4764 (*targetm.asm_out.unique_section) (decl, 0);
4765 named_section (decl, NULL, 0);
4767 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4768 fputs ("\t.hidden\t", asm_out_file);
4769 assemble_name (asm_out_file, name);
4770 fputc ('\n', asm_out_file);
4771 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4776 ASM_OUTPUT_LABEL (asm_out_file, name);
4779 xops[0] = gen_rtx_REG (SImode, regno);
4780 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4781 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4782 output_asm_insn ("ret", xops);
4785 if (NEED_INDICATE_EXEC_STACK)
4786 file_end_indicate_exec_stack ();
4789 /* Emit code for the SET_GOT patterns. */
4792 output_set_got (rtx dest)
4797 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4799 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4801 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4804 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4806 output_asm_insn ("call\t%a2", xops);
4809 /* Output the "canonical" label name ("Lxx$pb") here too. This
4810 is what will be referred to by the Mach-O PIC subsystem. */
4811 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4813 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4814 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4817 output_asm_insn ("pop{l}\t%0", xops);
4822 get_pc_thunk_name (name, REGNO (dest));
4823 pic_labels_used |= 1 << REGNO (dest);
4825 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4826 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4827 output_asm_insn ("call\t%X2", xops);
4830 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4831 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4832 else if (!TARGET_MACHO)
4833 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4838 /* Generate an "push" pattern for input ARG. */
4843 return gen_rtx_SET (VOIDmode,
4845 gen_rtx_PRE_DEC (Pmode,
4846 stack_pointer_rtx)),
4850 /* Return >= 0 if there is an unused call-clobbered register available
4851 for the entire function. */
4854 ix86_select_alt_pic_regnum (void)
4856 if (current_function_is_leaf && !current_function_profile)
4859 for (i = 2; i >= 0; --i)
4860 if (!regs_ever_live[i])
4864 return INVALID_REGNUM;
4867 /* Return 1 if we need to save REGNO. */
4869 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4871 if (pic_offset_table_rtx
4872 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4873 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4874 || current_function_profile
4875 || current_function_calls_eh_return
4876 || current_function_uses_const_pool))
4878 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4883 if (current_function_calls_eh_return && maybe_eh_return)
4888 unsigned test = EH_RETURN_DATA_REGNO (i);
4889 if (test == INVALID_REGNUM)
4896 return (regs_ever_live[regno]
4897 && !call_used_regs[regno]
4898 && !fixed_regs[regno]
4899 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4902 /* Return number of registers to be saved on the stack. */
4905 ix86_nsaved_regs (void)
4910 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4911 if (ix86_save_reg (regno, true))
4916 /* Return the offset between two registers, one to be eliminated, and the other
4917 its replacement, at the start of a routine. */
4920 ix86_initial_elimination_offset (int from, int to)
4922 struct ix86_frame frame;
4923 ix86_compute_frame_layout (&frame);
4925 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4926 return frame.hard_frame_pointer_offset;
4927 else if (from == FRAME_POINTER_REGNUM
4928 && to == HARD_FRAME_POINTER_REGNUM)
4929 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4932 if (to != STACK_POINTER_REGNUM)
4934 else if (from == ARG_POINTER_REGNUM)
4935 return frame.stack_pointer_offset;
4936 else if (from != FRAME_POINTER_REGNUM)
4939 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4943 /* Fill structure ix86_frame about frame of currently computed function. */
4946 ix86_compute_frame_layout (struct ix86_frame *frame)
4948 HOST_WIDE_INT total_size;
4949 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4950 HOST_WIDE_INT offset;
4951 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4952 HOST_WIDE_INT size = get_frame_size ();
4954 frame->nregs = ix86_nsaved_regs ();
4957 /* During reload iteration the amount of registers saved can change.
4958 Recompute the value as needed. Do not recompute when amount of registers
4959 didn't change as reload does mutiple calls to the function and does not
4960 expect the decision to change within single iteration. */
4962 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4964 int count = frame->nregs;
4966 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4967 /* The fast prologue uses move instead of push to save registers. This
4968 is significantly longer, but also executes faster as modern hardware
4969 can execute the moves in parallel, but can't do that for push/pop.
4971 Be careful about choosing what prologue to emit: When function takes
4972 many instructions to execute we may use slow version as well as in
4973 case function is known to be outside hot spot (this is known with
4974 feedback only). Weight the size of function by number of registers
4975 to save as it is cheap to use one or two push instructions but very
4976 slow to use many of them. */
4978 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4979 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4980 || (flag_branch_probabilities
4981 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4982 cfun->machine->use_fast_prologue_epilogue = false;
4984 cfun->machine->use_fast_prologue_epilogue
4985 = !expensive_function_p (count);
4987 if (TARGET_PROLOGUE_USING_MOVE
4988 && cfun->machine->use_fast_prologue_epilogue)
4989 frame->save_regs_using_mov = true;
4991 frame->save_regs_using_mov = false;
4994 /* Skip return address and saved base pointer. */
4995 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4997 frame->hard_frame_pointer_offset = offset;
4999 /* Do some sanity checking of stack_alignment_needed and
5000 preferred_alignment, since i386 port is the only using those features
5001 that may break easily. */
5003 if (size && !stack_alignment_needed)
5005 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5007 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5009 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5012 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5013 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5015 /* Register save area */
5016 offset += frame->nregs * UNITS_PER_WORD;
5019 if (ix86_save_varrargs_registers)
5021 offset += X86_64_VARARGS_SIZE;
5022 frame->va_arg_size = X86_64_VARARGS_SIZE;
5025 frame->va_arg_size = 0;
5027 /* Align start of frame for local function. */
5028 frame->padding1 = ((offset + stack_alignment_needed - 1)
5029 & -stack_alignment_needed) - offset;
5031 offset += frame->padding1;
5033 /* Frame pointer points here. */
5034 frame->frame_pointer_offset = offset;
5038 /* Add outgoing arguments area. Can be skipped if we eliminated
5039 all the function calls as dead code.
5040 Skipping is however impossible when function calls alloca. Alloca
5041 expander assumes that last current_function_outgoing_args_size
5042 of stack frame are unused. */
5043 if (ACCUMULATE_OUTGOING_ARGS
5044 && (!current_function_is_leaf || current_function_calls_alloca))
5046 offset += current_function_outgoing_args_size;
5047 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5050 frame->outgoing_arguments_size = 0;
5052 /* Align stack boundary. Only needed if we're calling another function
5054 if (!current_function_is_leaf || current_function_calls_alloca)
5055 frame->padding2 = ((offset + preferred_alignment - 1)
5056 & -preferred_alignment) - offset;
5058 frame->padding2 = 0;
5060 offset += frame->padding2;
5062 /* We've reached end of stack frame. */
5063 frame->stack_pointer_offset = offset;
5065 /* Size prologue needs to allocate. */
5066 frame->to_allocate =
5067 (size + frame->padding1 + frame->padding2
5068 + frame->outgoing_arguments_size + frame->va_arg_size);
5070 if ((!frame->to_allocate && frame->nregs <= 1)
5071 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5072 frame->save_regs_using_mov = false;
5074 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5075 && current_function_is_leaf)
5077 frame->red_zone_size = frame->to_allocate;
5078 if (frame->save_regs_using_mov)
5079 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5080 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5081 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5084 frame->red_zone_size = 0;
5085 frame->to_allocate -= frame->red_zone_size;
5086 frame->stack_pointer_offset -= frame->red_zone_size;
5088 fprintf (stderr, "nregs: %i\n", frame->nregs);
5089 fprintf (stderr, "size: %i\n", size);
5090 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5091 fprintf (stderr, "padding1: %i\n", frame->padding1);
5092 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5093 fprintf (stderr, "padding2: %i\n", frame->padding2);
5094 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5095 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5096 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5097 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5098 frame->hard_frame_pointer_offset);
5099 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5103 /* Emit code to save registers in the prologue. */
5106 ix86_emit_save_regs (void)
5111 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5112 if (ix86_save_reg (regno, true))
5114 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5115 RTX_FRAME_RELATED_P (insn) = 1;
5119 /* Emit code to save registers using MOV insns. First register
5120 is restored from POINTER + OFFSET. */
5122 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5127 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5128 if (ix86_save_reg (regno, true))
5130 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5132 gen_rtx_REG (Pmode, regno));
5133 RTX_FRAME_RELATED_P (insn) = 1;
5134 offset += UNITS_PER_WORD;
5138 /* Expand prologue or epilogue stack adjustment.
5139 The pattern exist to put a dependency on all ebp-based memory accesses.
5140 STYLE should be negative if instructions should be marked as frame related,
5141 zero if %r11 register is live and cannot be freely used and positive
5145 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5150 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5151 else if (x86_64_immediate_operand (offset, DImode))
5152 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5156 /* r11 is used by indirect sibcall return as well, set before the
5157 epilogue and used after the epilogue. ATM indirect sibcall
5158 shouldn't be used together with huge frame sizes in one
5159 function because of the frame_size check in sibcall.c. */
5162 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5163 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5165 RTX_FRAME_RELATED_P (insn) = 1;
5166 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5170 RTX_FRAME_RELATED_P (insn) = 1;
5173 /* Expand the prologue into a bunch of separate insns. */
5176 ix86_expand_prologue (void)
5180 struct ix86_frame frame;
5181 HOST_WIDE_INT allocate;
5183 ix86_compute_frame_layout (&frame);
5185 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5186 slower on all targets. Also sdb doesn't like it. */
5188 if (frame_pointer_needed)
5190 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5191 RTX_FRAME_RELATED_P (insn) = 1;
5193 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5194 RTX_FRAME_RELATED_P (insn) = 1;
5197 allocate = frame.to_allocate;
5199 if (!frame.save_regs_using_mov)
5200 ix86_emit_save_regs ();
5202 allocate += frame.nregs * UNITS_PER_WORD;
5204 /* When using red zone we may start register saving before allocating
5205 the stack frame saving one cycle of the prologue. */
5206 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5207 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5208 : stack_pointer_rtx,
5209 -frame.nregs * UNITS_PER_WORD);
5213 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5214 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5215 GEN_INT (-allocate), -1);
5218 /* Only valid for Win32. */
5219 rtx eax = gen_rtx_REG (SImode, 0);
5220 bool eax_live = ix86_eax_live_at_start_p ();
5227 emit_insn (gen_push (eax));
5231 insn = emit_move_insn (eax, GEN_INT (allocate));
5232 RTX_FRAME_RELATED_P (insn) = 1;
5234 insn = emit_insn (gen_allocate_stack_worker (eax));
5235 RTX_FRAME_RELATED_P (insn) = 1;
5239 rtx t = plus_constant (stack_pointer_rtx, allocate);
5240 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5244 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5246 if (!frame_pointer_needed || !frame.to_allocate)
5247 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5249 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5250 -frame.nregs * UNITS_PER_WORD);
5253 pic_reg_used = false;
5254 if (pic_offset_table_rtx
5255 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5256 || current_function_profile))
5258 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5260 if (alt_pic_reg_used != INVALID_REGNUM)
5261 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5263 pic_reg_used = true;
5268 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5270 /* Even with accurate pre-reload life analysis, we can wind up
5271 deleting all references to the pic register after reload.
5272 Consider if cross-jumping unifies two sides of a branch
5273 controlled by a comparison vs the only read from a global.
5274 In which case, allow the set_got to be deleted, though we're
5275 too late to do anything about the ebx save in the prologue. */
5276 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5279 /* Prevent function calls from be scheduled before the call to mcount.
5280 In the pic_reg_used case, make sure that the got load isn't deleted. */
5281 if (current_function_profile)
5282 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5285 /* Emit code to restore saved registers using MOV insns. First register
5286 is restored from POINTER + OFFSET. */
5288 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5289 int maybe_eh_return)
5292 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5294 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5295 if (ix86_save_reg (regno, maybe_eh_return))
5297 /* Ensure that adjust_address won't be forced to produce pointer
5298 out of range allowed by x86-64 instruction set. */
5299 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5303 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5304 emit_move_insn (r11, GEN_INT (offset));
5305 emit_insn (gen_adddi3 (r11, r11, pointer));
5306 base_address = gen_rtx_MEM (Pmode, r11);
5309 emit_move_insn (gen_rtx_REG (Pmode, regno),
5310 adjust_address (base_address, Pmode, offset));
5311 offset += UNITS_PER_WORD;
5315 /* Restore function stack, frame, and registers. */
5318 ix86_expand_epilogue (int style)
5321 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5322 struct ix86_frame frame;
5323 HOST_WIDE_INT offset;
5325 ix86_compute_frame_layout (&frame);
5327 /* Calculate start of saved registers relative to ebp. Special care
5328 must be taken for the normal return case of a function using
5329 eh_return: the eax and edx registers are marked as saved, but not
5330 restored along this path. */
5331 offset = frame.nregs;
5332 if (current_function_calls_eh_return && style != 2)
5334 offset *= -UNITS_PER_WORD;
5336 /* If we're only restoring one register and sp is not valid then
5337 using a move instruction to restore the register since it's
5338 less work than reloading sp and popping the register.
5340 The default code result in stack adjustment using add/lea instruction,
5341 while this code results in LEAVE instruction (or discrete equivalent),
5342 so it is profitable in some other cases as well. Especially when there
5343 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5344 and there is exactly one register to pop. This heuristic may need some
5345 tuning in future. */
5346 if ((!sp_valid && frame.nregs <= 1)
5347 || (TARGET_EPILOGUE_USING_MOVE
5348 && cfun->machine->use_fast_prologue_epilogue
5349 && (frame.nregs > 1 || frame.to_allocate))
5350 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5351 || (frame_pointer_needed && TARGET_USE_LEAVE
5352 && cfun->machine->use_fast_prologue_epilogue
5353 && frame.nregs == 1)
5354 || current_function_calls_eh_return)
5356 /* Restore registers. We can use ebp or esp to address the memory
5357 locations. If both are available, default to ebp, since offsets
5358 are known to be small. Only exception is esp pointing directly to the
5359 end of block of saved registers, where we may simplify addressing
5362 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5363 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5364 frame.to_allocate, style == 2);
5366 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5367 offset, style == 2);
5369 /* eh_return epilogues need %ecx added to the stack pointer. */
5372 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5374 if (frame_pointer_needed)
5376 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5377 tmp = plus_constant (tmp, UNITS_PER_WORD);
5378 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5380 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5381 emit_move_insn (hard_frame_pointer_rtx, tmp);
5383 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5388 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5389 tmp = plus_constant (tmp, (frame.to_allocate
5390 + frame.nregs * UNITS_PER_WORD));
5391 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5394 else if (!frame_pointer_needed)
5395 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5396 GEN_INT (frame.to_allocate
5397 + frame.nregs * UNITS_PER_WORD),
5399 /* If not an i386, mov & pop is faster than "leave". */
5400 else if (TARGET_USE_LEAVE || optimize_size
5401 || !cfun->machine->use_fast_prologue_epilogue)
5402 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5405 pro_epilogue_adjust_stack (stack_pointer_rtx,
5406 hard_frame_pointer_rtx,
5409 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5411 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5416 /* First step is to deallocate the stack frame so that we can
5417 pop the registers. */
5420 if (!frame_pointer_needed)
5422 pro_epilogue_adjust_stack (stack_pointer_rtx,
5423 hard_frame_pointer_rtx,
5424 GEN_INT (offset), style);
5426 else if (frame.to_allocate)
5427 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5428 GEN_INT (frame.to_allocate), style);
5430 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5431 if (ix86_save_reg (regno, false))
5434 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5436 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5438 if (frame_pointer_needed)
5440 /* Leave results in shorter dependency chains on CPUs that are
5441 able to grok it fast. */
5442 if (TARGET_USE_LEAVE)
5443 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5444 else if (TARGET_64BIT)
5445 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5447 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5451 /* Sibcall epilogues don't want a return instruction. */
5455 if (current_function_pops_args && current_function_args_size)
5457 rtx popc = GEN_INT (current_function_pops_args);
5459 /* i386 can only pop 64K bytes. If asked to pop more, pop
5460 return address, do explicit add, and jump indirectly to the
5463 if (current_function_pops_args >= 65536)
5465 rtx ecx = gen_rtx_REG (SImode, 2);
5467 /* There is no "pascal" calling convention in 64bit ABI. */
5471 emit_insn (gen_popsi1 (ecx));
5472 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5473 emit_jump_insn (gen_return_indirect_internal (ecx));
5476 emit_jump_insn (gen_return_pop_internal (popc));
5479 emit_jump_insn (gen_return_internal ());
5482 /* Reset from the function's potential modifications. */
5485 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5486 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5488 if (pic_offset_table_rtx)
5489 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5492 /* Extract the parts of an RTL expression that is a valid memory address
5493 for an instruction. Return 0 if the structure of the address is
5494 grossly off. Return -1 if the address contains ASHIFT, so it is not
5495 strictly valid, but still used for computing length of lea instruction. */
5498 ix86_decompose_address (rtx addr, struct ix86_address *out)
5500 rtx base = NULL_RTX;
5501 rtx index = NULL_RTX;
5502 rtx disp = NULL_RTX;
5503 HOST_WIDE_INT scale = 1;
5504 rtx scale_rtx = NULL_RTX;
5506 enum ix86_address_seg seg = SEG_DEFAULT;
5508 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5510 else if (GET_CODE (addr) == PLUS)
5520 addends[n++] = XEXP (op, 1);
5523 while (GET_CODE (op) == PLUS);
5528 for (i = n; i >= 0; --i)
5531 switch (GET_CODE (op))
5536 index = XEXP (op, 0);
5537 scale_rtx = XEXP (op, 1);
5541 if (XINT (op, 1) == UNSPEC_TP
5542 && TARGET_TLS_DIRECT_SEG_REFS
5543 && seg == SEG_DEFAULT)
5544 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5573 else if (GET_CODE (addr) == MULT)
5575 index = XEXP (addr, 0); /* index*scale */
5576 scale_rtx = XEXP (addr, 1);
5578 else if (GET_CODE (addr) == ASHIFT)
5582 /* We're called for lea too, which implements ashift on occasion. */
5583 index = XEXP (addr, 0);
5584 tmp = XEXP (addr, 1);
5585 if (GET_CODE (tmp) != CONST_INT)
5587 scale = INTVAL (tmp);
5588 if ((unsigned HOST_WIDE_INT) scale > 3)
5594 disp = addr; /* displacement */
5596 /* Extract the integral value of scale. */
5599 if (GET_CODE (scale_rtx) != CONST_INT)
5601 scale = INTVAL (scale_rtx);
5604 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5605 if (base && index && scale == 1
5606 && (index == arg_pointer_rtx
5607 || index == frame_pointer_rtx
5608 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5615 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5616 if ((base == hard_frame_pointer_rtx
5617 || base == frame_pointer_rtx
5618 || base == arg_pointer_rtx) && !disp)
5621 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5622 Avoid this by transforming to [%esi+0]. */
5623 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5624 && base && !index && !disp
5626 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5629 /* Special case: encode reg+reg instead of reg*2. */
5630 if (!base && index && scale && scale == 2)
5631 base = index, scale = 1;
5633 /* Special case: scaling cannot be encoded without base or displacement. */
5634 if (!base && !disp && index && scale != 1)
5646 /* Return cost of the memory address x.
5647 For i386, it is better to use a complex address than let gcc copy
5648 the address into a reg and make a new pseudo. But not if the address
5649 requires to two regs - that would mean more pseudos with longer
5652 ix86_address_cost (rtx x)
5654 struct ix86_address parts;
5657 if (!ix86_decompose_address (x, &parts))
5660 /* More complex memory references are better. */
5661 if (parts.disp && parts.disp != const0_rtx)
5663 if (parts.seg != SEG_DEFAULT)
5666 /* Attempt to minimize number of registers in the address. */
5668 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5670 && (!REG_P (parts.index)
5671 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5675 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5677 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5678 && parts.base != parts.index)
5681 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5682 since it's predecode logic can't detect the length of instructions
5683 and it degenerates to vector decoded. Increase cost of such
5684 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5685 to split such addresses or even refuse such addresses at all.
5687 Following addressing modes are affected:
5692 The first and last case may be avoidable by explicitly coding the zero in
5693 memory address, but I don't have AMD-K6 machine handy to check this
5697 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5698 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5699 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5705 /* If X is a machine specific address (i.e. a symbol or label being
5706 referenced as a displacement from the GOT implemented using an
5707 UNSPEC), then return the base term. Otherwise return X. */
5710 ix86_find_base_term (rtx x)
5716 if (GET_CODE (x) != CONST)
5719 if (GET_CODE (term) == PLUS
5720 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5721 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5722 term = XEXP (term, 0);
5723 if (GET_CODE (term) != UNSPEC
5724 || XINT (term, 1) != UNSPEC_GOTPCREL)
5727 term = XVECEXP (term, 0, 0);
5729 if (GET_CODE (term) != SYMBOL_REF
5730 && GET_CODE (term) != LABEL_REF)
5736 term = ix86_delegitimize_address (x);
5738 if (GET_CODE (term) != SYMBOL_REF
5739 && GET_CODE (term) != LABEL_REF)
5745 /* Determine if a given RTX is a valid constant. We already know this
5746 satisfies CONSTANT_P. */
5749 legitimate_constant_p (rtx x)
5753 switch (GET_CODE (x))
5756 /* TLS symbols are not constant. */
5757 if (tls_symbolic_operand (x, Pmode))
5762 inner = XEXP (x, 0);
5764 /* Offsets of TLS symbols are never valid.
5765 Discourage CSE from creating them. */
5766 if (GET_CODE (inner) == PLUS
5767 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5770 if (GET_CODE (inner) == PLUS)
5772 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5774 inner = XEXP (inner, 0);
5777 /* Only some unspecs are valid as "constants". */
5778 if (GET_CODE (inner) == UNSPEC)
5779 switch (XINT (inner, 1))
5783 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5785 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5795 /* Otherwise we handle everything else in the move patterns. */
5799 /* Determine if it's legal to put X into the constant pool. This
5800 is not possible for the address of thread-local symbols, which
5801 is checked above. */
5804 ix86_cannot_force_const_mem (rtx x)
5806 return !legitimate_constant_p (x);
5809 /* Determine if a given RTX is a valid constant address. */
5812 constant_address_p (rtx x)
5814 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5817 /* Nonzero if the constant value X is a legitimate general operand
5818 when generating PIC code. It is given that flag_pic is on and
5819 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5822 legitimate_pic_operand_p (rtx x)
5826 switch (GET_CODE (x))
5829 inner = XEXP (x, 0);
5831 /* Only some unspecs are valid as "constants". */
5832 if (GET_CODE (inner) == UNSPEC)
5833 switch (XINT (inner, 1))
5836 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5844 return legitimate_pic_address_disp_p (x);
5851 /* Determine if a given CONST RTX is a valid memory displacement
5855 legitimate_pic_address_disp_p (rtx disp)
5859 /* In 64bit mode we can allow direct addresses of symbols and labels
5860 when they are not dynamic symbols. */
5863 /* TLS references should always be enclosed in UNSPEC. */
5864 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5866 if (GET_CODE (disp) == SYMBOL_REF
5867 && ix86_cmodel == CM_SMALL_PIC
5868 && SYMBOL_REF_LOCAL_P (disp))
5870 if (GET_CODE (disp) == LABEL_REF)
5872 if (GET_CODE (disp) == CONST
5873 && GET_CODE (XEXP (disp, 0)) == PLUS)
5875 rtx op0 = XEXP (XEXP (disp, 0), 0);
5876 rtx op1 = XEXP (XEXP (disp, 0), 1);
5878 /* TLS references should always be enclosed in UNSPEC. */
5879 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5881 if (((GET_CODE (op0) == SYMBOL_REF
5882 && ix86_cmodel == CM_SMALL_PIC
5883 && SYMBOL_REF_LOCAL_P (op0))
5884 || GET_CODE (op0) == LABEL_REF)
5885 && GET_CODE (op1) == CONST_INT
5886 && INTVAL (op1) < 16*1024*1024
5887 && INTVAL (op1) >= -16*1024*1024)
5891 if (GET_CODE (disp) != CONST)
5893 disp = XEXP (disp, 0);
5897 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5898 of GOT tables. We should not need these anyway. */
5899 if (GET_CODE (disp) != UNSPEC
5900 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5903 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5904 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5910 if (GET_CODE (disp) == PLUS)
5912 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5914 disp = XEXP (disp, 0);
5918 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5919 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5921 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5922 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5923 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5925 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5926 if (! strcmp (sym_name, "<pic base>"))
5931 if (GET_CODE (disp) != UNSPEC)
5934 switch (XINT (disp, 1))
5939 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5941 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5942 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5943 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5945 case UNSPEC_GOTTPOFF:
5946 case UNSPEC_GOTNTPOFF:
5947 case UNSPEC_INDNTPOFF:
5950 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5952 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5954 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5960 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5961 memory address for an instruction. The MODE argument is the machine mode
5962 for the MEM expression that wants to use this address.
5964 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5965 convert common non-canonical forms to canonical form so that they will
5969 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5971 struct ix86_address parts;
5972 rtx base, index, disp;
5973 HOST_WIDE_INT scale;
5974 const char *reason = NULL;
5975 rtx reason_rtx = NULL_RTX;
5977 if (TARGET_DEBUG_ADDR)
5980 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5981 GET_MODE_NAME (mode), strict);
5985 if (ix86_decompose_address (addr, &parts) <= 0)
5987 reason = "decomposition failed";
5992 index = parts.index;
5994 scale = parts.scale;
5996 /* Validate base register.
5998 Don't allow SUBREG's here, it can lead to spill failures when the base
5999 is one word out of a two word structure, which is represented internally
6006 if (GET_CODE (base) != REG)
6008 reason = "base is not a register";
6012 if (GET_MODE (base) != Pmode)
6014 reason = "base is not in Pmode";
6018 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6019 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6021 reason = "base is not valid";
6026 /* Validate index register.
6028 Don't allow SUBREG's here, it can lead to spill failures when the index
6029 is one word out of a two word structure, which is represented internally
6036 if (GET_CODE (index) != REG)
6038 reason = "index is not a register";
6042 if (GET_MODE (index) != Pmode)
6044 reason = "index is not in Pmode";
6048 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6049 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6051 reason = "index is not valid";
6056 /* Validate scale factor. */
6059 reason_rtx = GEN_INT (scale);
6062 reason = "scale without index";
6066 if (scale != 2 && scale != 4 && scale != 8)
6068 reason = "scale is not a valid multiplier";
6073 /* Validate displacement. */
6078 if (GET_CODE (disp) == CONST
6079 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6080 switch (XINT (XEXP (disp, 0), 1))
6084 case UNSPEC_GOTPCREL:
6087 goto is_legitimate_pic;
6089 case UNSPEC_GOTTPOFF:
6090 case UNSPEC_GOTNTPOFF:
6091 case UNSPEC_INDNTPOFF:
6097 reason = "invalid address unspec";
6101 else if (flag_pic && (SYMBOLIC_CONST (disp)
6103 && !machopic_operand_p (disp)
6108 if (TARGET_64BIT && (index || base))
6110 /* foo@dtpoff(%rX) is ok. */
6111 if (GET_CODE (disp) != CONST
6112 || GET_CODE (XEXP (disp, 0)) != PLUS
6113 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6114 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6115 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6116 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6118 reason = "non-constant pic memory reference";
6122 else if (! legitimate_pic_address_disp_p (disp))
6124 reason = "displacement is an invalid pic construct";
6128 /* This code used to verify that a symbolic pic displacement
6129 includes the pic_offset_table_rtx register.
6131 While this is good idea, unfortunately these constructs may
6132 be created by "adds using lea" optimization for incorrect
6141 This code is nonsensical, but results in addressing
6142 GOT table with pic_offset_table_rtx base. We can't
6143 just refuse it easily, since it gets matched by
6144 "addsi3" pattern, that later gets split to lea in the
6145 case output register differs from input. While this
6146 can be handled by separate addsi pattern for this case
6147 that never results in lea, this seems to be easier and
6148 correct fix for crash to disable this test. */
6150 else if (GET_CODE (disp) != LABEL_REF
6151 && GET_CODE (disp) != CONST_INT
6152 && (GET_CODE (disp) != CONST
6153 || !legitimate_constant_p (disp))
6154 && (GET_CODE (disp) != SYMBOL_REF
6155 || !legitimate_constant_p (disp)))
6157 reason = "displacement is not constant";
6160 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6162 reason = "displacement is out of range";
6167 /* Everything looks valid. */
6168 if (TARGET_DEBUG_ADDR)
6169 fprintf (stderr, "Success.\n");
6173 if (TARGET_DEBUG_ADDR)
6175 fprintf (stderr, "Error: %s\n", reason);
6176 debug_rtx (reason_rtx);
6181 /* Return an unique alias set for the GOT. */
6183 static HOST_WIDE_INT
6184 ix86_GOT_alias_set (void)
6186 static HOST_WIDE_INT set = -1;
6188 set = new_alias_set ();
6192 /* Return a legitimate reference for ORIG (an address) using the
6193 register REG. If REG is 0, a new pseudo is generated.
6195 There are two types of references that must be handled:
6197 1. Global data references must load the address from the GOT, via
6198 the PIC reg. An insn is emitted to do this load, and the reg is
6201 2. Static data references, constant pool addresses, and code labels
6202 compute the address as an offset from the GOT, whose base is in
6203 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6204 differentiate them from global data objects. The returned
6205 address is the PIC reg + an unspec constant.
6207 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6208 reg also appears in the address. */
6211 legitimize_pic_address (rtx orig, rtx reg)
6219 reg = gen_reg_rtx (Pmode);
6220 /* Use the generic Mach-O PIC machinery. */
6221 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6224 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6226 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6228 /* This symbol may be referenced via a displacement from the PIC
6229 base address (@GOTOFF). */
6231 if (reload_in_progress)
6232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6233 if (GET_CODE (addr) == CONST)
6234 addr = XEXP (addr, 0);
6235 if (GET_CODE (addr) == PLUS)
6237 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6238 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6241 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6242 new = gen_rtx_CONST (Pmode, new);
6243 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6247 emit_move_insn (reg, new);
6251 else if (GET_CODE (addr) == SYMBOL_REF)
6255 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6256 new = gen_rtx_CONST (Pmode, new);
6257 new = gen_rtx_MEM (Pmode, new);
6258 RTX_UNCHANGING_P (new) = 1;
6259 set_mem_alias_set (new, ix86_GOT_alias_set ());
6262 reg = gen_reg_rtx (Pmode);
6263 /* Use directly gen_movsi, otherwise the address is loaded
6264 into register for CSE. We don't want to CSE this addresses,
6265 instead we CSE addresses from the GOT table, so skip this. */
6266 emit_insn (gen_movsi (reg, new));
6271 /* This symbol must be referenced via a load from the
6272 Global Offset Table (@GOT). */
6274 if (reload_in_progress)
6275 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6276 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6277 new = gen_rtx_CONST (Pmode, new);
6278 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6279 new = gen_rtx_MEM (Pmode, new);
6280 RTX_UNCHANGING_P (new) = 1;
6281 set_mem_alias_set (new, ix86_GOT_alias_set ());
6284 reg = gen_reg_rtx (Pmode);
6285 emit_move_insn (reg, new);
6291 if (GET_CODE (addr) == CONST)
6293 addr = XEXP (addr, 0);
6295 /* We must match stuff we generate before. Assume the only
6296 unspecs that can get here are ours. Not that we could do
6297 anything with them anyway.... */
6298 if (GET_CODE (addr) == UNSPEC
6299 || (GET_CODE (addr) == PLUS
6300 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6302 if (GET_CODE (addr) != PLUS)
6305 if (GET_CODE (addr) == PLUS)
6307 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6309 /* Check first to see if this is a constant offset from a @GOTOFF
6310 symbol reference. */
6311 if (local_symbolic_operand (op0, Pmode)
6312 && GET_CODE (op1) == CONST_INT)
6316 if (reload_in_progress)
6317 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6318 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6320 new = gen_rtx_PLUS (Pmode, new, op1);
6321 new = gen_rtx_CONST (Pmode, new);
6322 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6326 emit_move_insn (reg, new);
6332 if (INTVAL (op1) < -16*1024*1024
6333 || INTVAL (op1) >= 16*1024*1024)
6334 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6339 base = legitimize_pic_address (XEXP (addr, 0), reg);
6340 new = legitimize_pic_address (XEXP (addr, 1),
6341 base == reg ? NULL_RTX : reg);
6343 if (GET_CODE (new) == CONST_INT)
6344 new = plus_constant (base, INTVAL (new));
6347 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6349 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6350 new = XEXP (new, 1);
6352 new = gen_rtx_PLUS (Pmode, base, new);
6360 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6363 get_thread_pointer (int to_reg)
6367 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6371 reg = gen_reg_rtx (Pmode);
6372 insn = gen_rtx_SET (VOIDmode, reg, tp);
6373 insn = emit_insn (insn);
6378 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6379 false if we expect this to be used for a memory address and true if
6380 we expect to load the address into a register. */
6383 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6385 rtx dest, base, off, pic;
6390 case TLS_MODEL_GLOBAL_DYNAMIC:
6391 dest = gen_reg_rtx (Pmode);
6394 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6397 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6398 insns = get_insns ();
6401 emit_libcall_block (insns, dest, rax, x);
6404 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6407 case TLS_MODEL_LOCAL_DYNAMIC:
6408 base = gen_reg_rtx (Pmode);
6411 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6414 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6415 insns = get_insns ();
6418 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6419 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6420 emit_libcall_block (insns, base, rax, note);
6423 emit_insn (gen_tls_local_dynamic_base_32 (base));
6425 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6426 off = gen_rtx_CONST (Pmode, off);
6428 return gen_rtx_PLUS (Pmode, base, off);
6430 case TLS_MODEL_INITIAL_EXEC:
6434 type = UNSPEC_GOTNTPOFF;
6438 if (reload_in_progress)
6439 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6440 pic = pic_offset_table_rtx;
6441 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6443 else if (!TARGET_GNU_TLS)
6445 pic = gen_reg_rtx (Pmode);
6446 emit_insn (gen_set_got (pic));
6447 type = UNSPEC_GOTTPOFF;
6452 type = UNSPEC_INDNTPOFF;
6455 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6456 off = gen_rtx_CONST (Pmode, off);
6458 off = gen_rtx_PLUS (Pmode, pic, off);
6459 off = gen_rtx_MEM (Pmode, off);
6460 RTX_UNCHANGING_P (off) = 1;
6461 set_mem_alias_set (off, ix86_GOT_alias_set ());
6463 if (TARGET_64BIT || TARGET_GNU_TLS)
6465 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6466 off = force_reg (Pmode, off);
6467 return gen_rtx_PLUS (Pmode, base, off);
6471 base = get_thread_pointer (true);
6472 dest = gen_reg_rtx (Pmode);
6473 emit_insn (gen_subsi3 (dest, base, off));
6477 case TLS_MODEL_LOCAL_EXEC:
6478 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6479 (TARGET_64BIT || TARGET_GNU_TLS)
6480 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6481 off = gen_rtx_CONST (Pmode, off);
6483 if (TARGET_64BIT || TARGET_GNU_TLS)
6485 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6486 return gen_rtx_PLUS (Pmode, base, off);
6490 base = get_thread_pointer (true);
6491 dest = gen_reg_rtx (Pmode);
6492 emit_insn (gen_subsi3 (dest, base, off));
6503 /* Try machine-dependent ways of modifying an illegitimate address
6504 to be legitimate. If we find one, return the new, valid address.
6505 This macro is used in only one place: `memory_address' in explow.c.
6507 OLDX is the address as it was before break_out_memory_refs was called.
6508 In some cases it is useful to look at this to decide what needs to be done.
6510 MODE and WIN are passed so that this macro can use
6511 GO_IF_LEGITIMATE_ADDRESS.
6513 It is always safe for this macro to do nothing. It exists to recognize
6514 opportunities to optimize the output.
6516 For the 80386, we handle X+REG by loading X into a register R and
6517 using R+REG. R will go in a general reg and indexing will be used.
6518 However, if REG is a broken-out memory address or multiplication,
6519 nothing needs to be done because REG can certainly go in a general reg.
6521 When -fpic is used, special handling is needed for symbolic references.
6522 See comments by legitimize_pic_address in i386.c for details. */
6525 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6530 if (TARGET_DEBUG_ADDR)
6532 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6533 GET_MODE_NAME (mode));
6537 log = tls_symbolic_operand (x, mode);
6539 return legitimize_tls_address (x, log, false);
6541 if (flag_pic && SYMBOLIC_CONST (x))
6542 return legitimize_pic_address (x, 0);
6544 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6545 if (GET_CODE (x) == ASHIFT
6546 && GET_CODE (XEXP (x, 1)) == CONST_INT
6547 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6550 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6551 GEN_INT (1 << log));
6554 if (GET_CODE (x) == PLUS)
6556 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6558 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6559 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6560 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6563 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6564 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6565 GEN_INT (1 << log));
6568 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6569 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6570 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6573 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6574 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6575 GEN_INT (1 << log));
6578 /* Put multiply first if it isn't already. */
6579 if (GET_CODE (XEXP (x, 1)) == MULT)
6581 rtx tmp = XEXP (x, 0);
6582 XEXP (x, 0) = XEXP (x, 1);
6587 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6588 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6589 created by virtual register instantiation, register elimination, and
6590 similar optimizations. */
6591 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6594 x = gen_rtx_PLUS (Pmode,
6595 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6596 XEXP (XEXP (x, 1), 0)),
6597 XEXP (XEXP (x, 1), 1));
6601 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6602 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6603 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6604 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6605 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6606 && CONSTANT_P (XEXP (x, 1)))
6609 rtx other = NULL_RTX;
6611 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6613 constant = XEXP (x, 1);
6614 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6616 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6618 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6619 other = XEXP (x, 1);
6627 x = gen_rtx_PLUS (Pmode,
6628 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6629 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6630 plus_constant (other, INTVAL (constant)));
6634 if (changed && legitimate_address_p (mode, x, FALSE))
6637 if (GET_CODE (XEXP (x, 0)) == MULT)
6640 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6643 if (GET_CODE (XEXP (x, 1)) == MULT)
6646 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6650 && GET_CODE (XEXP (x, 1)) == REG
6651 && GET_CODE (XEXP (x, 0)) == REG)
6654 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6657 x = legitimize_pic_address (x, 0);
6660 if (changed && legitimate_address_p (mode, x, FALSE))
6663 if (GET_CODE (XEXP (x, 0)) == REG)
6665 rtx temp = gen_reg_rtx (Pmode);
6666 rtx val = force_operand (XEXP (x, 1), temp);
6668 emit_move_insn (temp, val);
6674 else if (GET_CODE (XEXP (x, 1)) == REG)
6676 rtx temp = gen_reg_rtx (Pmode);
6677 rtx val = force_operand (XEXP (x, 0), temp);
6679 emit_move_insn (temp, val);
6689 /* Print an integer constant expression in assembler syntax. Addition
6690 and subtraction are the only arithmetic that may appear in these
6691 expressions. FILE is the stdio stream to write to, X is the rtx, and
6692 CODE is the operand print code from the output string. */
6695 output_pic_addr_const (FILE *file, rtx x, int code)
6699 switch (GET_CODE (x))
6709 assemble_name (file, XSTR (x, 0));
6710 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6711 fputs ("@PLT", file);
6718 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6719 assemble_name (asm_out_file, buf);
6723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6727 /* This used to output parentheses around the expression,
6728 but that does not work on the 386 (either ATT or BSD assembler). */
6729 output_pic_addr_const (file, XEXP (x, 0), code);
6733 if (GET_MODE (x) == VOIDmode)
6735 /* We can use %d if the number is <32 bits and positive. */
6736 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6737 fprintf (file, "0x%lx%08lx",
6738 (unsigned long) CONST_DOUBLE_HIGH (x),
6739 (unsigned long) CONST_DOUBLE_LOW (x));
6741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6744 /* We can't handle floating point constants;
6745 PRINT_OPERAND must handle them. */
6746 output_operand_lossage ("floating constant misused");
6750 /* Some assemblers need integer constants to appear first. */
6751 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6753 output_pic_addr_const (file, XEXP (x, 0), code);
6755 output_pic_addr_const (file, XEXP (x, 1), code);
6757 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6759 output_pic_addr_const (file, XEXP (x, 1), code);
6761 output_pic_addr_const (file, XEXP (x, 0), code);
6769 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6770 output_pic_addr_const (file, XEXP (x, 0), code);
6772 output_pic_addr_const (file, XEXP (x, 1), code);
6774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6778 if (XVECLEN (x, 0) != 1)
6780 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6781 switch (XINT (x, 1))
6784 fputs ("@GOT", file);
6787 fputs ("@GOTOFF", file);
6789 case UNSPEC_GOTPCREL:
6790 fputs ("@GOTPCREL(%rip)", file);
6792 case UNSPEC_GOTTPOFF:
6793 /* FIXME: This might be @TPOFF in Sun ld too. */
6794 fputs ("@GOTTPOFF", file);
6797 fputs ("@TPOFF", file);
6801 fputs ("@TPOFF", file);
6803 fputs ("@NTPOFF", file);
6806 fputs ("@DTPOFF", file);
6808 case UNSPEC_GOTNTPOFF:
6810 fputs ("@GOTTPOFF(%rip)", file);
6812 fputs ("@GOTNTPOFF", file);
6814 case UNSPEC_INDNTPOFF:
6815 fputs ("@INDNTPOFF", file);
6818 output_operand_lossage ("invalid UNSPEC as operand");
6824 output_operand_lossage ("invalid expression as operand");
6828 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6829 We need to handle our special PIC relocations. */
6832 i386_dwarf_output_addr_const (FILE *file, rtx x)
6835 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6839 fprintf (file, "%s", ASM_LONG);
6842 output_pic_addr_const (file, x, '\0');
6844 output_addr_const (file, x);
6848 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6849 We need to emit DTP-relative relocations. */
6852 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6854 fputs (ASM_LONG, file);
6855 output_addr_const (file, x);
6856 fputs ("@DTPOFF", file);
6862 fputs (", 0", file);
6869 /* In the name of slightly smaller debug output, and to cater to
6870 general assembler losage, recognize PIC+GOTOFF and turn it back
6871 into a direct symbol reference. */
6874 ix86_delegitimize_address (rtx orig_x)
6878 if (GET_CODE (x) == MEM)
6883 if (GET_CODE (x) != CONST
6884 || GET_CODE (XEXP (x, 0)) != UNSPEC
6885 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6886 || GET_CODE (orig_x) != MEM)
6888 return XVECEXP (XEXP (x, 0), 0, 0);
6891 if (GET_CODE (x) != PLUS
6892 || GET_CODE (XEXP (x, 1)) != CONST)
6895 if (GET_CODE (XEXP (x, 0)) == REG
6896 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6897 /* %ebx + GOT/GOTOFF */
6899 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6901 /* %ebx + %reg * scale + GOT/GOTOFF */
6903 if (GET_CODE (XEXP (y, 0)) == REG
6904 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6906 else if (GET_CODE (XEXP (y, 1)) == REG
6907 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6911 if (GET_CODE (y) != REG
6912 && GET_CODE (y) != MULT
6913 && GET_CODE (y) != ASHIFT)
6919 x = XEXP (XEXP (x, 1), 0);
6920 if (GET_CODE (x) == UNSPEC
6921 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6922 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6925 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6926 return XVECEXP (x, 0, 0);
6929 if (GET_CODE (x) == PLUS
6930 && GET_CODE (XEXP (x, 0)) == UNSPEC
6931 && GET_CODE (XEXP (x, 1)) == CONST_INT
6932 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6933 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6934 && GET_CODE (orig_x) != MEM)))
6936 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6938 return gen_rtx_PLUS (Pmode, y, x);
6946 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6951 if (mode == CCFPmode || mode == CCFPUmode)
6953 enum rtx_code second_code, bypass_code;
6954 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6955 if (bypass_code != NIL || second_code != NIL)
6957 code = ix86_fp_compare_code_to_integer (code);
6961 code = reverse_condition (code);
6972 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6977 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6978 Those same assemblers have the same but opposite losage on cmov. */
6981 suffix = fp ? "nbe" : "a";
6984 if (mode == CCNOmode || mode == CCGOCmode)
6986 else if (mode == CCmode || mode == CCGCmode)
6997 if (mode == CCNOmode || mode == CCGOCmode)
6999 else if (mode == CCmode || mode == CCGCmode)
7008 suffix = fp ? "nb" : "ae";
7011 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7021 suffix = fp ? "u" : "p";
7024 suffix = fp ? "nu" : "np";
7029 fputs (suffix, file);
7032 /* Print the name of register X to FILE based on its machine mode and number.
7033 If CODE is 'w', pretend the mode is HImode.
7034 If CODE is 'b', pretend the mode is QImode.
7035 If CODE is 'k', pretend the mode is SImode.
7036 If CODE is 'q', pretend the mode is DImode.
7037 If CODE is 'h', pretend the reg is the `high' byte register.
7038 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7041 print_reg (rtx x, int code, FILE *file)
7043 if (REGNO (x) == ARG_POINTER_REGNUM
7044 || REGNO (x) == FRAME_POINTER_REGNUM
7045 || REGNO (x) == FLAGS_REG
7046 || REGNO (x) == FPSR_REG)
7049 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7052 if (code == 'w' || MMX_REG_P (x))
7054 else if (code == 'b')
7056 else if (code == 'k')
7058 else if (code == 'q')
7060 else if (code == 'y')
7062 else if (code == 'h')
7065 code = GET_MODE_SIZE (GET_MODE (x));
7067 /* Irritatingly, AMD extended registers use different naming convention
7068 from the normal registers. */
7069 if (REX_INT_REG_P (x))
7076 error ("extended registers have no high halves");
7079 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7082 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7085 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7088 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7091 error ("unsupported operand size for extended register");
7099 if (STACK_TOP_P (x))
7101 fputs ("st(0)", file);
7108 if (! ANY_FP_REG_P (x))
7109 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7114 fputs (hi_reg_name[REGNO (x)], file);
7117 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7119 fputs (qi_reg_name[REGNO (x)], file);
7122 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7124 fputs (qi_high_reg_name[REGNO (x)], file);
7131 /* Locate some local-dynamic symbol still in use by this function
7132 so that we can print its name in some tls_local_dynamic_base
7136 get_some_local_dynamic_name (void)
7140 if (cfun->machine->some_ld_name)
7141 return cfun->machine->some_ld_name;
7143 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7145 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7146 return cfun->machine->some_ld_name;
7152 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7156 if (GET_CODE (x) == SYMBOL_REF
7157 && local_dynamic_symbolic_operand (x, Pmode))
7159 cfun->machine->some_ld_name = XSTR (x, 0);
7167 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7168 C -- print opcode suffix for set/cmov insn.
7169 c -- like C, but print reversed condition
7170 F,f -- likewise, but for floating-point.
7171 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7173 R -- print the prefix for register names.
7174 z -- print the opcode suffix for the size of the current operand.
7175 * -- print a star (in certain assembler syntax)
7176 A -- print an absolute memory reference.
7177 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7178 s -- print a shift double count, followed by the assemblers argument
7180 b -- print the QImode name of the register for the indicated operand.
7181 %b0 would print %al if operands[0] is reg 0.
7182 w -- likewise, print the HImode name of the register.
7183 k -- likewise, print the SImode name of the register.
7184 q -- likewise, print the DImode name of the register.
7185 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7186 y -- print "st(0)" instead of "st" as a register.
7187 D -- print condition for SSE cmp instruction.
7188 P -- if PIC, print an @PLT suffix.
7189 X -- don't print any sort of PIC '@' suffix for a symbol.
7190 & -- print some in-use local-dynamic symbol name.
7194 print_operand (FILE *file, rtx x, int code)
7201 if (ASSEMBLER_DIALECT == ASM_ATT)
7206 assemble_name (file, get_some_local_dynamic_name ());
7210 if (ASSEMBLER_DIALECT == ASM_ATT)
7212 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7214 /* Intel syntax. For absolute addresses, registers should not
7215 be surrounded by braces. */
7216 if (GET_CODE (x) != REG)
7219 PRINT_OPERAND (file, x, 0);
7227 PRINT_OPERAND (file, x, 0);
7232 if (ASSEMBLER_DIALECT == ASM_ATT)
7237 if (ASSEMBLER_DIALECT == ASM_ATT)
7242 if (ASSEMBLER_DIALECT == ASM_ATT)
7247 if (ASSEMBLER_DIALECT == ASM_ATT)
7252 if (ASSEMBLER_DIALECT == ASM_ATT)
7257 if (ASSEMBLER_DIALECT == ASM_ATT)
7262 /* 387 opcodes don't get size suffixes if the operands are
7264 if (STACK_REG_P (x))
7267 /* Likewise if using Intel opcodes. */
7268 if (ASSEMBLER_DIALECT == ASM_INTEL)
7271 /* This is the size of op from size of operand. */
7272 switch (GET_MODE_SIZE (GET_MODE (x)))
7275 #ifdef HAVE_GAS_FILDS_FISTS
7281 if (GET_MODE (x) == SFmode)
7296 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7298 #ifdef GAS_MNEMONICS
7324 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7326 PRINT_OPERAND (file, x, 0);
7332 /* Little bit of braindamage here. The SSE compare instructions
7333 does use completely different names for the comparisons that the
7334 fp conditional moves. */
7335 switch (GET_CODE (x))
7350 fputs ("unord", file);
7354 fputs ("neq", file);
7358 fputs ("nlt", file);
7362 fputs ("nle", file);
7365 fputs ("ord", file);
7373 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7374 if (ASSEMBLER_DIALECT == ASM_ATT)
7376 switch (GET_MODE (x))
7378 case HImode: putc ('w', file); break;
7380 case SFmode: putc ('l', file); break;
7382 case DFmode: putc ('q', file); break;
7390 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7393 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7394 if (ASSEMBLER_DIALECT == ASM_ATT)
7397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7400 /* Like above, but reverse condition */
7402 /* Check to see if argument to %c is really a constant
7403 and not a condition code which needs to be reversed. */
7404 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7406 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7409 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7412 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7413 if (ASSEMBLER_DIALECT == ASM_ATT)
7416 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7422 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7425 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7428 int pred_val = INTVAL (XEXP (x, 0));
7430 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7431 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7433 int taken = pred_val > REG_BR_PROB_BASE / 2;
7434 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7436 /* Emit hints only in the case default branch prediction
7437 heuristics would fail. */
7438 if (taken != cputaken)
7440 /* We use 3e (DS) prefix for taken branches and
7441 2e (CS) prefix for not taken branches. */
7443 fputs ("ds ; ", file);
7445 fputs ("cs ; ", file);
7452 output_operand_lossage ("invalid operand code `%c'", code);
7456 if (GET_CODE (x) == REG)
7457 print_reg (x, code, file);
7459 else if (GET_CODE (x) == MEM)
7461 /* No `byte ptr' prefix for call instructions. */
7462 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7465 switch (GET_MODE_SIZE (GET_MODE (x)))
7467 case 1: size = "BYTE"; break;
7468 case 2: size = "WORD"; break;
7469 case 4: size = "DWORD"; break;
7470 case 8: size = "QWORD"; break;
7471 case 12: size = "XWORD"; break;
7472 case 16: size = "XMMWORD"; break;
7477 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7480 else if (code == 'w')
7482 else if (code == 'k')
7486 fputs (" PTR ", file);
7490 /* Avoid (%rip) for call operands. */
7491 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7492 && GET_CODE (x) != CONST_INT)
7493 output_addr_const (file, x);
7494 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7495 output_operand_lossage ("invalid constraints for operand");
7500 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7505 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7506 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7508 if (ASSEMBLER_DIALECT == ASM_ATT)
7510 fprintf (file, "0x%08lx", l);
7513 /* These float cases don't actually occur as immediate operands. */
7514 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7518 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7519 fprintf (file, "%s", dstr);
7522 else if (GET_CODE (x) == CONST_DOUBLE
7523 && GET_MODE (x) == XFmode)
7527 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7528 fprintf (file, "%s", dstr);
7535 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7537 if (ASSEMBLER_DIALECT == ASM_ATT)
7540 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7541 || GET_CODE (x) == LABEL_REF)
7543 if (ASSEMBLER_DIALECT == ASM_ATT)
7546 fputs ("OFFSET FLAT:", file);
7549 if (GET_CODE (x) == CONST_INT)
7550 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7552 output_pic_addr_const (file, x, code);
7554 output_addr_const (file, x);
7558 /* Print a memory operand whose address is ADDR. */
7561 print_operand_address (FILE *file, rtx addr)
7563 struct ix86_address parts;
7564 rtx base, index, disp;
7567 if (! ix86_decompose_address (addr, &parts))
7571 index = parts.index;
7573 scale = parts.scale;
7581 if (USER_LABEL_PREFIX[0] == 0)
7583 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7589 if (!base && !index)
7591 /* Displacement only requires special attention. */
7593 if (GET_CODE (disp) == CONST_INT)
7595 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7597 if (USER_LABEL_PREFIX[0] == 0)
7599 fputs ("ds:", file);
7601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7604 output_pic_addr_const (file, disp, 0);
7606 output_addr_const (file, disp);
7608 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7610 && ((GET_CODE (disp) == SYMBOL_REF
7611 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7612 || GET_CODE (disp) == LABEL_REF
7613 || (GET_CODE (disp) == CONST
7614 && GET_CODE (XEXP (disp, 0)) == PLUS
7615 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7616 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7617 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7618 fputs ("(%rip)", file);
7622 if (ASSEMBLER_DIALECT == ASM_ATT)
7627 output_pic_addr_const (file, disp, 0);
7628 else if (GET_CODE (disp) == LABEL_REF)
7629 output_asm_label (disp);
7631 output_addr_const (file, disp);
7636 print_reg (base, 0, file);
7640 print_reg (index, 0, file);
7642 fprintf (file, ",%d", scale);
7648 rtx offset = NULL_RTX;
7652 /* Pull out the offset of a symbol; print any symbol itself. */
7653 if (GET_CODE (disp) == CONST
7654 && GET_CODE (XEXP (disp, 0)) == PLUS
7655 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7657 offset = XEXP (XEXP (disp, 0), 1);
7658 disp = gen_rtx_CONST (VOIDmode,
7659 XEXP (XEXP (disp, 0), 0));
7663 output_pic_addr_const (file, disp, 0);
7664 else if (GET_CODE (disp) == LABEL_REF)
7665 output_asm_label (disp);
7666 else if (GET_CODE (disp) == CONST_INT)
7669 output_addr_const (file, disp);
7675 print_reg (base, 0, file);
7678 if (INTVAL (offset) >= 0)
7680 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7691 print_reg (index, 0, file);
7693 fprintf (file, "*%d", scale);
7701 output_addr_const_extra (FILE *file, rtx x)
7705 if (GET_CODE (x) != UNSPEC)
7708 op = XVECEXP (x, 0, 0);
7709 switch (XINT (x, 1))
7711 case UNSPEC_GOTTPOFF:
7712 output_addr_const (file, op);
7713 /* FIXME: This might be @TPOFF in Sun ld. */
7714 fputs ("@GOTTPOFF", file);
7717 output_addr_const (file, op);
7718 fputs ("@TPOFF", file);
7721 output_addr_const (file, op);
7723 fputs ("@TPOFF", file);
7725 fputs ("@NTPOFF", file);
7728 output_addr_const (file, op);
7729 fputs ("@DTPOFF", file);
7731 case UNSPEC_GOTNTPOFF:
7732 output_addr_const (file, op);
7734 fputs ("@GOTTPOFF(%rip)", file);
7736 fputs ("@GOTNTPOFF", file);
7738 case UNSPEC_INDNTPOFF:
7739 output_addr_const (file, op);
7740 fputs ("@INDNTPOFF", file);
7750 /* Split one or more DImode RTL references into pairs of SImode
7751 references. The RTL can be REG, offsettable MEM, integer constant, or
7752 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7753 split and "num" is its length. lo_half and hi_half are output arrays
7754 that parallel "operands". */
7757 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7761 rtx op = operands[num];
7763 /* simplify_subreg refuse to split volatile memory addresses,
7764 but we still have to handle it. */
7765 if (GET_CODE (op) == MEM)
7767 lo_half[num] = adjust_address (op, SImode, 0);
7768 hi_half[num] = adjust_address (op, SImode, 4);
7772 lo_half[num] = simplify_gen_subreg (SImode, op,
7773 GET_MODE (op) == VOIDmode
7774 ? DImode : GET_MODE (op), 0);
7775 hi_half[num] = simplify_gen_subreg (SImode, op,
7776 GET_MODE (op) == VOIDmode
7777 ? DImode : GET_MODE (op), 4);
7781 /* Split one or more TImode RTL references into pairs of SImode
7782 references. The RTL can be REG, offsettable MEM, integer constant, or
7783 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7784 split and "num" is its length. lo_half and hi_half are output arrays
7785 that parallel "operands". */
7788 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7792 rtx op = operands[num];
7794 /* simplify_subreg refuse to split volatile memory addresses, but we
7795 still have to handle it. */
7796 if (GET_CODE (op) == MEM)
7798 lo_half[num] = adjust_address (op, DImode, 0);
7799 hi_half[num] = adjust_address (op, DImode, 8);
7803 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7804 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7809 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7810 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7811 is the expression of the binary operation. The output may either be
7812 emitted here, or returned to the caller, like all output_* functions.
7814 There is no guarantee that the operands are the same mode, as they
7815 might be within FLOAT or FLOAT_EXTEND expressions. */
7817 #ifndef SYSV386_COMPAT
7818 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7819 wants to fix the assemblers because that causes incompatibility
7820 with gcc. No-one wants to fix gcc because that causes
7821 incompatibility with assemblers... You can use the option of
7822 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7823 #define SYSV386_COMPAT 1
7827 output_387_binary_op (rtx insn, rtx *operands)
7829 static char buf[30];
7832 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7834 #ifdef ENABLE_CHECKING
7835 /* Even if we do not want to check the inputs, this documents input
7836 constraints. Which helps in understanding the following code. */
7837 if (STACK_REG_P (operands[0])
7838 && ((REG_P (operands[1])
7839 && REGNO (operands[0]) == REGNO (operands[1])
7840 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7841 || (REG_P (operands[2])
7842 && REGNO (operands[0]) == REGNO (operands[2])
7843 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7844 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7850 switch (GET_CODE (operands[3]))
7853 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7854 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7862 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7863 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7895 if (GET_MODE (operands[0]) == SFmode)
7896 strcat (buf, "ss\t{%2, %0|%0, %2}");
7898 strcat (buf, "sd\t{%2, %0|%0, %2}");
7903 switch (GET_CODE (operands[3]))
7907 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7909 rtx temp = operands[2];
7910 operands[2] = operands[1];
7914 /* know operands[0] == operands[1]. */
7916 if (GET_CODE (operands[2]) == MEM)
7922 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7924 if (STACK_TOP_P (operands[0]))
7925 /* How is it that we are storing to a dead operand[2]?
7926 Well, presumably operands[1] is dead too. We can't
7927 store the result to st(0) as st(0) gets popped on this
7928 instruction. Instead store to operands[2] (which I
7929 think has to be st(1)). st(1) will be popped later.
7930 gcc <= 2.8.1 didn't have this check and generated
7931 assembly code that the Unixware assembler rejected. */
7932 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7934 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7938 if (STACK_TOP_P (operands[0]))
7939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7941 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7946 if (GET_CODE (operands[1]) == MEM)
7952 if (GET_CODE (operands[2]) == MEM)
7958 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7961 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7962 derived assemblers, confusingly reverse the direction of
7963 the operation for fsub{r} and fdiv{r} when the
7964 destination register is not st(0). The Intel assembler
7965 doesn't have this brain damage. Read !SYSV386_COMPAT to
7966 figure out what the hardware really does. */
7967 if (STACK_TOP_P (operands[0]))
7968 p = "{p\t%0, %2|rp\t%2, %0}";
7970 p = "{rp\t%2, %0|p\t%0, %2}";
7972 if (STACK_TOP_P (operands[0]))
7973 /* As above for fmul/fadd, we can't store to st(0). */
7974 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7976 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7984 if (STACK_TOP_P (operands[0]))
7985 p = "{rp\t%0, %1|p\t%1, %0}";
7987 p = "{p\t%1, %0|rp\t%0, %1}";
7989 if (STACK_TOP_P (operands[0]))
7990 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7992 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7997 if (STACK_TOP_P (operands[0]))
7999 if (STACK_TOP_P (operands[1]))
8000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8002 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8005 else if (STACK_TOP_P (operands[1]))
8008 p = "{\t%1, %0|r\t%0, %1}";
8010 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8016 p = "{r\t%2, %0|\t%0, %2}";
8018 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8031 /* Output code to initialize control word copies used by
8032 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8033 is set to control word rounding downwards. */
8035 emit_i387_cw_initialization (rtx normal, rtx round_down)
8037 rtx reg = gen_reg_rtx (HImode);
8039 emit_insn (gen_x86_fnstcw_1 (normal));
8040 emit_move_insn (reg, normal);
8041 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8043 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8046 emit_move_insn (round_down, reg);
8049 /* Output code for INSN to convert a float to a signed int. OPERANDS
8050 are the insn operands. The output may be [HSD]Imode and the input
8051 operand may be [SDX]Fmode. */
8054 output_fix_trunc (rtx insn, rtx *operands)
8056 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8057 int dimode_p = GET_MODE (operands[0]) == DImode;
8059 /* Jump through a hoop or two for DImode, since the hardware has no
8060 non-popping instruction. We used to do this a different way, but
8061 that was somewhat fragile and broke with post-reload splitters. */
8062 if (dimode_p && !stack_top_dies)
8063 output_asm_insn ("fld\t%y1", operands);
8065 if (!STACK_TOP_P (operands[1]))
8068 if (GET_CODE (operands[0]) != MEM)
8071 output_asm_insn ("fldcw\t%3", operands);
8072 if (stack_top_dies || dimode_p)
8073 output_asm_insn ("fistp%z0\t%0", operands);
8075 output_asm_insn ("fist%z0\t%0", operands);
8076 output_asm_insn ("fldcw\t%2", operands);
8081 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8082 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8083 when fucom should be used. */
8086 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8089 rtx cmp_op0 = operands[0];
8090 rtx cmp_op1 = operands[1];
8091 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8096 cmp_op1 = operands[2];
8100 if (GET_MODE (operands[0]) == SFmode)
8102 return "ucomiss\t{%1, %0|%0, %1}";
8104 return "comiss\t{%1, %0|%0, %1}";
8107 return "ucomisd\t{%1, %0|%0, %1}";
8109 return "comisd\t{%1, %0|%0, %1}";
8112 if (! STACK_TOP_P (cmp_op0))
8115 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8117 if (STACK_REG_P (cmp_op1)
8119 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8120 && REGNO (cmp_op1) != FIRST_STACK_REG)
8122 /* If both the top of the 387 stack dies, and the other operand
8123 is also a stack register that dies, then this must be a
8124 `fcompp' float compare */
8128 /* There is no double popping fcomi variant. Fortunately,
8129 eflags is immune from the fstp's cc clobbering. */
8131 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8133 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8141 return "fucompp\n\tfnstsw\t%0";
8143 return "fcompp\n\tfnstsw\t%0";
8156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8158 static const char * const alt[24] =
8170 "fcomi\t{%y1, %0|%0, %y1}",
8171 "fcomip\t{%y1, %0|%0, %y1}",
8172 "fucomi\t{%y1, %0|%0, %y1}",
8173 "fucomip\t{%y1, %0|%0, %y1}",
8180 "fcom%z2\t%y2\n\tfnstsw\t%0",
8181 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8182 "fucom%z2\t%y2\n\tfnstsw\t%0",
8183 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8185 "ficom%z2\t%y2\n\tfnstsw\t%0",
8186 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8194 mask = eflags_p << 3;
8195 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8196 mask |= unordered_p << 1;
8197 mask |= stack_top_dies;
8210 ix86_output_addr_vec_elt (FILE *file, int value)
8212 const char *directive = ASM_LONG;
8217 directive = ASM_QUAD;
8223 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8227 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8230 fprintf (file, "%s%s%d-%s%d\n",
8231 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8232 else if (HAVE_AS_GOTOFF_IN_DATA)
8233 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8235 else if (TARGET_MACHO)
8237 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8238 machopic_output_function_base_name (file);
8239 fprintf(file, "\n");
8243 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8244 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8247 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8251 ix86_expand_clear (rtx dest)
8255 /* We play register width games, which are only valid after reload. */
8256 if (!reload_completed)
8259 /* Avoid HImode and its attendant prefix byte. */
8260 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8261 dest = gen_rtx_REG (SImode, REGNO (dest));
8263 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8265 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8266 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8268 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8269 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8275 /* X is an unchanging MEM. If it is a constant pool reference, return
8276 the constant pool rtx, else NULL. */
8279 maybe_get_pool_constant (rtx x)
8281 x = ix86_delegitimize_address (XEXP (x, 0));
8283 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8284 return get_pool_constant (x);
8290 ix86_expand_move (enum machine_mode mode, rtx operands[])
8292 int strict = (reload_in_progress || reload_completed);
8294 enum tls_model model;
8299 model = tls_symbolic_operand (op1, Pmode);
8302 op1 = legitimize_tls_address (op1, model, true);
8303 op1 = force_operand (op1, op0);
8308 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8313 rtx temp = ((reload_in_progress
8314 || ((op0 && GET_CODE (op0) == REG)
8316 ? op0 : gen_reg_rtx (Pmode));
8317 op1 = machopic_indirect_data_reference (op1, temp);
8318 op1 = machopic_legitimize_pic_address (op1, mode,
8319 temp == op1 ? 0 : temp);
8321 else if (MACHOPIC_INDIRECT)
8322 op1 = machopic_indirect_data_reference (op1, 0);
8326 if (GET_CODE (op0) == MEM)
8327 op1 = force_reg (Pmode, op1);
8331 if (GET_CODE (temp) != REG)
8332 temp = gen_reg_rtx (Pmode);
8333 temp = legitimize_pic_address (op1, temp);
8338 #endif /* TARGET_MACHO */
8342 if (GET_CODE (op0) == MEM
8343 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8344 || !push_operand (op0, mode))
8345 && GET_CODE (op1) == MEM)
8346 op1 = force_reg (mode, op1);
8348 if (push_operand (op0, mode)
8349 && ! general_no_elim_operand (op1, mode))
8350 op1 = copy_to_mode_reg (mode, op1);
8352 /* Force large constants in 64bit compilation into register
8353 to get them CSEed. */
8354 if (TARGET_64BIT && mode == DImode
8355 && immediate_operand (op1, mode)
8356 && !x86_64_zero_extended_value (op1)
8357 && !register_operand (op0, mode)
8358 && optimize && !reload_completed && !reload_in_progress)
8359 op1 = copy_to_mode_reg (mode, op1);
8361 if (FLOAT_MODE_P (mode))
8363 /* If we are loading a floating point constant to a register,
8364 force the value to memory now, since we'll get better code
8365 out the back end. */
8369 else if (GET_CODE (op1) == CONST_DOUBLE)
8371 op1 = validize_mem (force_const_mem (mode, op1));
8372 if (!register_operand (op0, mode))
8374 rtx temp = gen_reg_rtx (mode);
8375 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8376 emit_move_insn (op0, temp);
8383 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8387 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8389 /* Force constants other than zero into memory. We do not know how
8390 the instructions used to build constants modify the upper 64 bits
8391 of the register, once we have that information we may be able
8392 to handle some of them more efficiently. */
8393 if ((reload_in_progress | reload_completed) == 0
8394 && register_operand (operands[0], mode)
8395 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8396 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8398 /* Make operand1 a register if it isn't already. */
8400 && !register_operand (operands[0], mode)
8401 && !register_operand (operands[1], mode))
8403 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8404 emit_move_insn (operands[0], temp);
8408 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8411 /* Attempt to expand a binary operator. Make the expansion closer to the
8412 actual machine, then just general_operand, which will allow 3 separate
8413 memory references (one output, two input) in a single insn. */
8416 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8419 int matching_memory;
8420 rtx src1, src2, dst, op, clob;
8426 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8427 if (GET_RTX_CLASS (code) == 'c'
8428 && (rtx_equal_p (dst, src2)
8429 || immediate_operand (src1, mode)))
8436 /* If the destination is memory, and we do not have matching source
8437 operands, do things in registers. */
8438 matching_memory = 0;
8439 if (GET_CODE (dst) == MEM)
8441 if (rtx_equal_p (dst, src1))
8442 matching_memory = 1;
8443 else if (GET_RTX_CLASS (code) == 'c'
8444 && rtx_equal_p (dst, src2))
8445 matching_memory = 2;
8447 dst = gen_reg_rtx (mode);
8450 /* Both source operands cannot be in memory. */
8451 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8453 if (matching_memory != 2)
8454 src2 = force_reg (mode, src2);
8456 src1 = force_reg (mode, src1);
8459 /* If the operation is not commutable, source 1 cannot be a constant
8460 or non-matching memory. */
8461 if ((CONSTANT_P (src1)
8462 || (!matching_memory && GET_CODE (src1) == MEM))
8463 && GET_RTX_CLASS (code) != 'c')
8464 src1 = force_reg (mode, src1);
8466 /* If optimizing, copy to regs to improve CSE */
8467 if (optimize && ! no_new_pseudos)
8469 if (GET_CODE (dst) == MEM)
8470 dst = gen_reg_rtx (mode);
8471 if (GET_CODE (src1) == MEM)
8472 src1 = force_reg (mode, src1);
8473 if (GET_CODE (src2) == MEM)
8474 src2 = force_reg (mode, src2);
8477 /* Emit the instruction. */
8479 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8480 if (reload_in_progress)
8482 /* Reload doesn't know about the flags register, and doesn't know that
8483 it doesn't want to clobber it. We can only do this with PLUS. */
8490 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8491 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8494 /* Fix up the destination if needed. */
8495 if (dst != operands[0])
8496 emit_move_insn (operands[0], dst);
8499 /* Return TRUE or FALSE depending on whether the binary operator meets the
8500 appropriate constraints. */
8503 ix86_binary_operator_ok (enum rtx_code code,
8504 enum machine_mode mode ATTRIBUTE_UNUSED,
8507 /* Both source operands cannot be in memory. */
8508 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8510 /* If the operation is not commutable, source 1 cannot be a constant. */
8511 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8513 /* If the destination is memory, we must have a matching source operand. */
8514 if (GET_CODE (operands[0]) == MEM
8515 && ! (rtx_equal_p (operands[0], operands[1])
8516 || (GET_RTX_CLASS (code) == 'c'
8517 && rtx_equal_p (operands[0], operands[2]))))
8519 /* If the operation is not commutable and the source 1 is memory, we must
8520 have a matching destination. */
8521 if (GET_CODE (operands[1]) == MEM
8522 && GET_RTX_CLASS (code) != 'c'
8523 && ! rtx_equal_p (operands[0], operands[1]))
8528 /* Attempt to expand a unary operator. Make the expansion closer to the
8529 actual machine, then just general_operand, which will allow 2 separate
8530 memory references (one output, one input) in a single insn. */
8533 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8536 int matching_memory;
8537 rtx src, dst, op, clob;
8542 /* If the destination is memory, and we do not have matching source
8543 operands, do things in registers. */
8544 matching_memory = 0;
8545 if (GET_CODE (dst) == MEM)
8547 if (rtx_equal_p (dst, src))
8548 matching_memory = 1;
8550 dst = gen_reg_rtx (mode);
8553 /* When source operand is memory, destination must match. */
8554 if (!matching_memory && GET_CODE (src) == MEM)
8555 src = force_reg (mode, src);
8557 /* If optimizing, copy to regs to improve CSE */
8558 if (optimize && ! no_new_pseudos)
8560 if (GET_CODE (dst) == MEM)
8561 dst = gen_reg_rtx (mode);
8562 if (GET_CODE (src) == MEM)
8563 src = force_reg (mode, src);
8566 /* Emit the instruction. */
8568 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8569 if (reload_in_progress || code == NOT)
8571 /* Reload doesn't know about the flags register, and doesn't know that
8572 it doesn't want to clobber it. */
8579 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8580 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8583 /* Fix up the destination if needed. */
8584 if (dst != operands[0])
8585 emit_move_insn (operands[0], dst);
8588 /* Return TRUE or FALSE depending on whether the unary operator meets the
8589 appropriate constraints. */
8592 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8593 enum machine_mode mode ATTRIBUTE_UNUSED,
8594 rtx operands[2] ATTRIBUTE_UNUSED)
8596 /* If one of operands is memory, source and destination must match. */
8597 if ((GET_CODE (operands[0]) == MEM
8598 || GET_CODE (operands[1]) == MEM)
8599 && ! rtx_equal_p (operands[0], operands[1]))
8604 /* Return TRUE or FALSE depending on whether the first SET in INSN
8605 has source and destination with matching CC modes, and that the
8606 CC mode is at least as constrained as REQ_MODE. */
8609 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8612 enum machine_mode set_mode;
8614 set = PATTERN (insn);
8615 if (GET_CODE (set) == PARALLEL)
8616 set = XVECEXP (set, 0, 0);
8617 if (GET_CODE (set) != SET)
8619 if (GET_CODE (SET_SRC (set)) != COMPARE)
8622 set_mode = GET_MODE (SET_DEST (set));
8626 if (req_mode != CCNOmode
8627 && (req_mode != CCmode
8628 || XEXP (SET_SRC (set), 1) != const0_rtx))
8632 if (req_mode == CCGCmode)
8636 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8640 if (req_mode == CCZmode)
8650 return (GET_MODE (SET_SRC (set)) == set_mode);
8653 /* Generate insn patterns to do an integer compare of OPERANDS. */
8656 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8658 enum machine_mode cmpmode;
8661 cmpmode = SELECT_CC_MODE (code, op0, op1);
8662 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8664 /* This is very simple, but making the interface the same as in the
8665 FP case makes the rest of the code easier. */
8666 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8667 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8669 /* Return the test that should be put into the flags user, i.e.
8670 the bcc, scc, or cmov instruction. */
8671 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8674 /* Figure out whether to use ordered or unordered fp comparisons.
8675 Return the appropriate mode to use. */
8678 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8680 /* ??? In order to make all comparisons reversible, we do all comparisons
8681 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8682 all forms trapping and nontrapping comparisons, we can make inequality
8683 comparisons trapping again, since it results in better code when using
8684 FCOM based compares. */
8685 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8689 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8691 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8692 return ix86_fp_compare_mode (code);
8695 /* Only zero flag is needed. */
8697 case NE: /* ZF!=0 */
8699 /* Codes needing carry flag. */
8700 case GEU: /* CF=0 */
8701 case GTU: /* CF=0 & ZF=0 */
8702 case LTU: /* CF=1 */
8703 case LEU: /* CF=1 | ZF=1 */
8705 /* Codes possibly doable only with sign flag when
8706 comparing against zero. */
8707 case GE: /* SF=OF or SF=0 */
8708 case LT: /* SF<>OF or SF=1 */
8709 if (op1 == const0_rtx)
8712 /* For other cases Carry flag is not required. */
8714 /* Codes doable only with sign flag when comparing
8715 against zero, but we miss jump instruction for it
8716 so we need to use relational tests against overflow
8717 that thus needs to be zero. */
8718 case GT: /* ZF=0 & SF=OF */
8719 case LE: /* ZF=1 | SF<>OF */
8720 if (op1 == const0_rtx)
8724 /* strcmp pattern do (use flags) and combine may ask us for proper
8733 /* Return the fixed registers used for condition codes. */
8736 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8743 /* If two condition code modes are compatible, return a condition code
8744 mode which is compatible with both. Otherwise, return
8747 static enum machine_mode
8748 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8753 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8756 if ((m1 == CCGCmode && m2 == CCGOCmode)
8757 || (m1 == CCGOCmode && m2 == CCGCmode))
8785 /* These are only compatible with themselves, which we already
8791 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8794 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8796 enum rtx_code swapped_code = swap_condition (code);
8797 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8798 || (ix86_fp_comparison_cost (swapped_code)
8799 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8802 /* Swap, force into registers, or otherwise massage the two operands
8803 to a fp comparison. The operands are updated in place; the new
8804 comparison code is returned. */
8806 static enum rtx_code
8807 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8809 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8810 rtx op0 = *pop0, op1 = *pop1;
8811 enum machine_mode op_mode = GET_MODE (op0);
8812 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8814 /* All of the unordered compare instructions only work on registers.
8815 The same is true of the XFmode compare instructions. The same is
8816 true of the fcomi compare instructions. */
8819 && (fpcmp_mode == CCFPUmode
8820 || op_mode == XFmode
8821 || ix86_use_fcomi_compare (code)))
8823 op0 = force_reg (op_mode, op0);
8824 op1 = force_reg (op_mode, op1);
8828 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8829 things around if they appear profitable, otherwise force op0
8832 if (standard_80387_constant_p (op0) == 0
8833 || (GET_CODE (op0) == MEM
8834 && ! (standard_80387_constant_p (op1) == 0
8835 || GET_CODE (op1) == MEM)))
8838 tmp = op0, op0 = op1, op1 = tmp;
8839 code = swap_condition (code);
8842 if (GET_CODE (op0) != REG)
8843 op0 = force_reg (op_mode, op0);
8845 if (CONSTANT_P (op1))
8847 if (standard_80387_constant_p (op1))
8848 op1 = force_reg (op_mode, op1);
8850 op1 = validize_mem (force_const_mem (op_mode, op1));
8854 /* Try to rearrange the comparison to make it cheaper. */
8855 if (ix86_fp_comparison_cost (code)
8856 > ix86_fp_comparison_cost (swap_condition (code))
8857 && (GET_CODE (op1) == REG || !no_new_pseudos))
8860 tmp = op0, op0 = op1, op1 = tmp;
8861 code = swap_condition (code);
8862 if (GET_CODE (op0) != REG)
8863 op0 = force_reg (op_mode, op0);
8871 /* Convert comparison codes we use to represent FP comparison to integer
8872 code that will result in proper branch. Return UNKNOWN if no such code
8874 static enum rtx_code
8875 ix86_fp_compare_code_to_integer (enum rtx_code code)
8904 /* Split comparison code CODE into comparisons we can do using branch
8905 instructions. BYPASS_CODE is comparison code for branch that will
8906 branch around FIRST_CODE and SECOND_CODE. If some of branches
8907 is not required, set value to NIL.
8908 We never require more than two branches. */
8910 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8911 enum rtx_code *first_code,
8912 enum rtx_code *second_code)
8918 /* The fcomi comparison sets flags as follows:
8928 case GT: /* GTU - CF=0 & ZF=0 */
8929 case GE: /* GEU - CF=0 */
8930 case ORDERED: /* PF=0 */
8931 case UNORDERED: /* PF=1 */
8932 case UNEQ: /* EQ - ZF=1 */
8933 case UNLT: /* LTU - CF=1 */
8934 case UNLE: /* LEU - CF=1 | ZF=1 */
8935 case LTGT: /* EQ - ZF=0 */
8937 case LT: /* LTU - CF=1 - fails on unordered */
8939 *bypass_code = UNORDERED;
8941 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8943 *bypass_code = UNORDERED;
8945 case EQ: /* EQ - ZF=1 - fails on unordered */
8947 *bypass_code = UNORDERED;
8949 case NE: /* NE - ZF=0 - fails on unordered */
8951 *second_code = UNORDERED;
8953 case UNGE: /* GEU - CF=0 - fails on unordered */
8955 *second_code = UNORDERED;
8957 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8959 *second_code = UNORDERED;
8964 if (!TARGET_IEEE_FP)
8971 /* Return cost of comparison done fcom + arithmetics operations on AX.
8972 All following functions do use number of instructions as a cost metrics.
8973 In future this should be tweaked to compute bytes for optimize_size and
8974 take into account performance of various instructions on various CPUs. */
8976 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8978 if (!TARGET_IEEE_FP)
8980 /* The cost of code output by ix86_expand_fp_compare. */
9008 /* Return cost of comparison done using fcomi operation.
9009 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9011 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9013 enum rtx_code bypass_code, first_code, second_code;
9014 /* Return arbitrarily high cost when instruction is not supported - this
9015 prevents gcc from using it. */
9018 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9019 return (bypass_code != NIL || second_code != NIL) + 2;
9022 /* Return cost of comparison done using sahf operation.
9023 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9025 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9027 enum rtx_code bypass_code, first_code, second_code;
9028 /* Return arbitrarily high cost when instruction is not preferred - this
9029 avoids gcc from using it. */
9030 if (!TARGET_USE_SAHF && !optimize_size)
9032 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9033 return (bypass_code != NIL || second_code != NIL) + 3;
9036 /* Compute cost of the comparison done using any method.
9037 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9039 ix86_fp_comparison_cost (enum rtx_code code)
9041 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9044 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9045 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9047 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9048 if (min > sahf_cost)
9050 if (min > fcomi_cost)
9055 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9058 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9059 rtx *second_test, rtx *bypass_test)
9061 enum machine_mode fpcmp_mode, intcmp_mode;
9063 int cost = ix86_fp_comparison_cost (code);
9064 enum rtx_code bypass_code, first_code, second_code;
9066 fpcmp_mode = ix86_fp_compare_mode (code);
9067 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9070 *second_test = NULL_RTX;
9072 *bypass_test = NULL_RTX;
9074 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9076 /* Do fcomi/sahf based test when profitable. */
9077 if ((bypass_code == NIL || bypass_test)
9078 && (second_code == NIL || second_test)
9079 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9083 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9084 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9090 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9091 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9093 scratch = gen_reg_rtx (HImode);
9094 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9095 emit_insn (gen_x86_sahf_1 (scratch));
9098 /* The FP codes work out to act like unsigned. */
9099 intcmp_mode = fpcmp_mode;
9101 if (bypass_code != NIL)
9102 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9103 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9105 if (second_code != NIL)
9106 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9107 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9112 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9116 scratch = gen_reg_rtx (HImode);
9117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9119 /* In the unordered case, we have to check C2 for NaN's, which
9120 doesn't happen to work out to anything nice combination-wise.
9121 So do some bit twiddling on the value we've got in AH to come
9122 up with an appropriate set of condition codes. */
9124 intcmp_mode = CCNOmode;
9129 if (code == GT || !TARGET_IEEE_FP)
9131 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9136 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9137 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9138 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9139 intcmp_mode = CCmode;
9145 if (code == LT && TARGET_IEEE_FP)
9147 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9148 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9149 intcmp_mode = CCmode;
9154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9160 if (code == GE || !TARGET_IEEE_FP)
9162 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9167 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9168 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9175 if (code == LE && TARGET_IEEE_FP)
9177 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9178 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9179 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9180 intcmp_mode = CCmode;
9185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9191 if (code == EQ && TARGET_IEEE_FP)
9193 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9194 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9195 intcmp_mode = CCmode;
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9207 if (code == NE && TARGET_IEEE_FP)
9209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9210 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9216 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9222 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9226 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9235 /* Return the test that should be put into the flags user, i.e.
9236 the bcc, scc, or cmov instruction. */
9237 return gen_rtx_fmt_ee (code, VOIDmode,
9238 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9243 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9246 op0 = ix86_compare_op0;
9247 op1 = ix86_compare_op1;
9250 *second_test = NULL_RTX;
9252 *bypass_test = NULL_RTX;
9254 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9255 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9256 second_test, bypass_test);
9258 ret = ix86_expand_int_compare (code, op0, op1);
9263 /* Return true if the CODE will result in nontrivial jump sequence. */
9265 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9267 enum rtx_code bypass_code, first_code, second_code;
9270 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9271 return bypass_code != NIL || second_code != NIL;
9275 ix86_expand_branch (enum rtx_code code, rtx label)
9279 switch (GET_MODE (ix86_compare_op0))
9285 tmp = ix86_expand_compare (code, NULL, NULL);
9286 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9287 gen_rtx_LABEL_REF (VOIDmode, label),
9289 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9298 enum rtx_code bypass_code, first_code, second_code;
9300 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9303 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9305 /* Check whether we will use the natural sequence with one jump. If
9306 so, we can expand jump early. Otherwise delay expansion by
9307 creating compound insn to not confuse optimizers. */
9308 if (bypass_code == NIL && second_code == NIL
9311 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9312 gen_rtx_LABEL_REF (VOIDmode, label),
9317 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9318 ix86_compare_op0, ix86_compare_op1);
9319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9320 gen_rtx_LABEL_REF (VOIDmode, label),
9322 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9324 use_fcomi = ix86_use_fcomi_compare (code);
9325 vec = rtvec_alloc (3 + !use_fcomi);
9326 RTVEC_ELT (vec, 0) = tmp;
9328 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9330 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9333 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9335 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9343 /* Expand DImode branch into multiple compare+branch. */
9345 rtx lo[2], hi[2], label2;
9346 enum rtx_code code1, code2, code3;
9348 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9350 tmp = ix86_compare_op0;
9351 ix86_compare_op0 = ix86_compare_op1;
9352 ix86_compare_op1 = tmp;
9353 code = swap_condition (code);
9355 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9356 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9358 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9359 avoid two branches. This costs one extra insn, so disable when
9360 optimizing for size. */
9362 if ((code == EQ || code == NE)
9364 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9369 if (hi[1] != const0_rtx)
9370 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9371 NULL_RTX, 0, OPTAB_WIDEN);
9374 if (lo[1] != const0_rtx)
9375 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9376 NULL_RTX, 0, OPTAB_WIDEN);
9378 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9379 NULL_RTX, 0, OPTAB_WIDEN);
9381 ix86_compare_op0 = tmp;
9382 ix86_compare_op1 = const0_rtx;
9383 ix86_expand_branch (code, label);
9387 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9388 op1 is a constant and the low word is zero, then we can just
9389 examine the high word. */
9391 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9394 case LT: case LTU: case GE: case GEU:
9395 ix86_compare_op0 = hi[0];
9396 ix86_compare_op1 = hi[1];
9397 ix86_expand_branch (code, label);
9403 /* Otherwise, we need two or three jumps. */
9405 label2 = gen_label_rtx ();
9408 code2 = swap_condition (code);
9409 code3 = unsigned_condition (code);
9413 case LT: case GT: case LTU: case GTU:
9416 case LE: code1 = LT; code2 = GT; break;
9417 case GE: code1 = GT; code2 = LT; break;
9418 case LEU: code1 = LTU; code2 = GTU; break;
9419 case GEU: code1 = GTU; code2 = LTU; break;
9421 case EQ: code1 = NIL; code2 = NE; break;
9422 case NE: code2 = NIL; break;
9430 * if (hi(a) < hi(b)) goto true;
9431 * if (hi(a) > hi(b)) goto false;
9432 * if (lo(a) < lo(b)) goto true;
9436 ix86_compare_op0 = hi[0];
9437 ix86_compare_op1 = hi[1];
9440 ix86_expand_branch (code1, label);
9442 ix86_expand_branch (code2, label2);
9444 ix86_compare_op0 = lo[0];
9445 ix86_compare_op1 = lo[1];
9446 ix86_expand_branch (code3, label);
9449 emit_label (label2);
9458 /* Split branch based on floating point condition. */
9460 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9461 rtx target1, rtx target2, rtx tmp)
9464 rtx label = NULL_RTX;
9466 int bypass_probability = -1, second_probability = -1, probability = -1;
9469 if (target2 != pc_rtx)
9472 code = reverse_condition_maybe_unordered (code);
9477 condition = ix86_expand_fp_compare (code, op1, op2,
9478 tmp, &second, &bypass);
9480 if (split_branch_probability >= 0)
9482 /* Distribute the probabilities across the jumps.
9483 Assume the BYPASS and SECOND to be always test
9485 probability = split_branch_probability;
9487 /* Value of 1 is low enough to make no need for probability
9488 to be updated. Later we may run some experiments and see
9489 if unordered values are more frequent in practice. */
9491 bypass_probability = 1;
9493 second_probability = 1;
9495 if (bypass != NULL_RTX)
9497 label = gen_label_rtx ();
9498 i = emit_jump_insn (gen_rtx_SET
9500 gen_rtx_IF_THEN_ELSE (VOIDmode,
9502 gen_rtx_LABEL_REF (VOIDmode,
9505 if (bypass_probability >= 0)
9507 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9508 GEN_INT (bypass_probability),
9511 i = emit_jump_insn (gen_rtx_SET
9513 gen_rtx_IF_THEN_ELSE (VOIDmode,
9514 condition, target1, target2)));
9515 if (probability >= 0)
9517 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9518 GEN_INT (probability),
9520 if (second != NULL_RTX)
9522 i = emit_jump_insn (gen_rtx_SET
9524 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9526 if (second_probability >= 0)
9528 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9529 GEN_INT (second_probability),
9532 if (label != NULL_RTX)
9537 ix86_expand_setcc (enum rtx_code code, rtx dest)
9539 rtx ret, tmp, tmpreg, equiv;
9540 rtx second_test, bypass_test;
9542 if (GET_MODE (ix86_compare_op0) == DImode
9544 return 0; /* FAIL */
9546 if (GET_MODE (dest) != QImode)
9549 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9550 PUT_MODE (ret, QImode);
9555 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9556 if (bypass_test || second_test)
9558 rtx test = second_test;
9560 rtx tmp2 = gen_reg_rtx (QImode);
9567 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9569 PUT_MODE (test, QImode);
9570 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9573 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9575 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9578 /* Attach a REG_EQUAL note describing the comparison result. */
9579 equiv = simplify_gen_relational (code, QImode,
9580 GET_MODE (ix86_compare_op0),
9581 ix86_compare_op0, ix86_compare_op1);
9582 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9584 return 1; /* DONE */
9587 /* Expand comparison setting or clearing carry flag. Return true when
9588 successful and set pop for the operation. */
9590 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9592 enum machine_mode mode =
9593 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9595 /* Do not handle DImode compares that go trought special path. Also we can't
9596 deal with FP compares yet. This is possible to add. */
9597 if ((mode == DImode && !TARGET_64BIT))
9599 if (FLOAT_MODE_P (mode))
9601 rtx second_test = NULL, bypass_test = NULL;
9602 rtx compare_op, compare_seq;
9604 /* Shortcut: following common codes never translate into carry flag compares. */
9605 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9606 || code == ORDERED || code == UNORDERED)
9609 /* These comparisons require zero flag; swap operands so they won't. */
9610 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9616 code = swap_condition (code);
9619 /* Try to expand the comparison and verify that we end up with carry flag
9620 based comparison. This is fails to be true only when we decide to expand
9621 comparison using arithmetic that is not too common scenario. */
9623 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9624 &second_test, &bypass_test);
9625 compare_seq = get_insns ();
9628 if (second_test || bypass_test)
9630 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9631 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9632 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9634 code = GET_CODE (compare_op);
9635 if (code != LTU && code != GEU)
9637 emit_insn (compare_seq);
9641 if (!INTEGRAL_MODE_P (mode))
9649 /* Convert a==0 into (unsigned)a<1. */
9652 if (op1 != const0_rtx)
9655 code = (code == EQ ? LTU : GEU);
9658 /* Convert a>b into b<a or a>=b-1. */
9661 if (GET_CODE (op1) == CONST_INT)
9663 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9664 /* Bail out on overflow. We still can swap operands but that
9665 would force loading of the constant into register. */
9666 if (op1 == const0_rtx
9667 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9669 code = (code == GTU ? GEU : LTU);
9676 code = (code == GTU ? LTU : GEU);
9680 /* Convert a>=0 into (unsigned)a<0x80000000. */
9683 if (mode == DImode || op1 != const0_rtx)
9685 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9686 code = (code == LT ? GEU : LTU);
9690 if (mode == DImode || op1 != constm1_rtx)
9692 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9693 code = (code == LE ? GEU : LTU);
9699 /* Swapping operands may cause constant to appear as first operand. */
9700 if (!nonimmediate_operand (op0, VOIDmode))
9704 op0 = force_reg (mode, op0);
9706 ix86_compare_op0 = op0;
9707 ix86_compare_op1 = op1;
9708 *pop = ix86_expand_compare (code, NULL, NULL);
9709 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9715 ix86_expand_int_movcc (rtx operands[])
9717 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9718 rtx compare_seq, compare_op;
9719 rtx second_test, bypass_test;
9720 enum machine_mode mode = GET_MODE (operands[0]);
9721 bool sign_bit_compare_p = false;;
9724 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9725 compare_seq = get_insns ();
9728 compare_code = GET_CODE (compare_op);
9730 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9731 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9732 sign_bit_compare_p = true;
9734 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9735 HImode insns, we'd be swallowed in word prefix ops. */
9737 if ((mode != HImode || TARGET_FAST_PREFIX)
9738 && (mode != DImode || TARGET_64BIT)
9739 && GET_CODE (operands[2]) == CONST_INT
9740 && GET_CODE (operands[3]) == CONST_INT)
9742 rtx out = operands[0];
9743 HOST_WIDE_INT ct = INTVAL (operands[2]);
9744 HOST_WIDE_INT cf = INTVAL (operands[3]);
9748 /* Sign bit compares are better done using shifts than we do by using
9750 if (sign_bit_compare_p
9751 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9752 ix86_compare_op1, &compare_op))
9754 /* Detect overlap between destination and compare sources. */
9757 if (!sign_bit_compare_p)
9761 compare_code = GET_CODE (compare_op);
9763 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9764 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9767 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9770 /* To simplify rest of code, restrict to the GEU case. */
9771 if (compare_code == LTU)
9773 HOST_WIDE_INT tmp = ct;
9776 compare_code = reverse_condition (compare_code);
9777 code = reverse_condition (code);
9782 PUT_CODE (compare_op,
9783 reverse_condition_maybe_unordered
9784 (GET_CODE (compare_op)));
9786 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9790 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9791 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9792 tmp = gen_reg_rtx (mode);
9795 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9797 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9801 if (code == GT || code == GE)
9802 code = reverse_condition (code);
9805 HOST_WIDE_INT tmp = ct;
9810 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9811 ix86_compare_op1, VOIDmode, 0, -1);
9824 tmp = expand_simple_binop (mode, PLUS,
9826 copy_rtx (tmp), 1, OPTAB_DIRECT);
9837 tmp = expand_simple_binop (mode, IOR,
9839 copy_rtx (tmp), 1, OPTAB_DIRECT);
9841 else if (diff == -1 && ct)
9851 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9853 tmp = expand_simple_binop (mode, PLUS,
9854 copy_rtx (tmp), GEN_INT (cf),
9855 copy_rtx (tmp), 1, OPTAB_DIRECT);
9863 * andl cf - ct, dest
9873 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9876 tmp = expand_simple_binop (mode, AND,
9878 gen_int_mode (cf - ct, mode),
9879 copy_rtx (tmp), 1, OPTAB_DIRECT);
9881 tmp = expand_simple_binop (mode, PLUS,
9882 copy_rtx (tmp), GEN_INT (ct),
9883 copy_rtx (tmp), 1, OPTAB_DIRECT);
9886 if (!rtx_equal_p (tmp, out))
9887 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9889 return 1; /* DONE */
9895 tmp = ct, ct = cf, cf = tmp;
9897 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9899 /* We may be reversing unordered compare to normal compare, that
9900 is not valid in general (we may convert non-trapping condition
9901 to trapping one), however on i386 we currently emit all
9902 comparisons unordered. */
9903 compare_code = reverse_condition_maybe_unordered (compare_code);
9904 code = reverse_condition_maybe_unordered (code);
9908 compare_code = reverse_condition (compare_code);
9909 code = reverse_condition (code);
9914 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9915 && GET_CODE (ix86_compare_op1) == CONST_INT)
9917 if (ix86_compare_op1 == const0_rtx
9918 && (code == LT || code == GE))
9919 compare_code = code;
9920 else if (ix86_compare_op1 == constm1_rtx)
9924 else if (code == GT)
9929 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9930 if (compare_code != NIL
9931 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9932 && (cf == -1 || ct == -1))
9934 /* If lea code below could be used, only optimize
9935 if it results in a 2 insn sequence. */
9937 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9938 || diff == 3 || diff == 5 || diff == 9)
9939 || (compare_code == LT && ct == -1)
9940 || (compare_code == GE && cf == -1))
9943 * notl op1 (if necessary)
9951 code = reverse_condition (code);
9954 out = emit_store_flag (out, code, ix86_compare_op0,
9955 ix86_compare_op1, VOIDmode, 0, -1);
9957 out = expand_simple_binop (mode, IOR,
9959 out, 1, OPTAB_DIRECT);
9960 if (out != operands[0])
9961 emit_move_insn (operands[0], out);
9963 return 1; /* DONE */
9968 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9969 || diff == 3 || diff == 5 || diff == 9)
9970 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9971 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9977 * lea cf(dest*(ct-cf)),dest
9981 * This also catches the degenerate setcc-only case.
9987 out = emit_store_flag (out, code, ix86_compare_op0,
9988 ix86_compare_op1, VOIDmode, 0, 1);
9991 /* On x86_64 the lea instruction operates on Pmode, so we need
9992 to get arithmetics done in proper mode to match. */
9994 tmp = copy_rtx (out);
9998 out1 = copy_rtx (out);
9999 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10003 tmp = gen_rtx_PLUS (mode, tmp, out1);
10009 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10012 if (!rtx_equal_p (tmp, out))
10015 out = force_operand (tmp, copy_rtx (out));
10017 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10019 if (!rtx_equal_p (out, operands[0]))
10020 emit_move_insn (operands[0], copy_rtx (out));
10022 return 1; /* DONE */
10026 * General case: Jumpful:
10027 * xorl dest,dest cmpl op1, op2
10028 * cmpl op1, op2 movl ct, dest
10029 * setcc dest jcc 1f
10030 * decl dest movl cf, dest
10031 * andl (cf-ct),dest 1:
10034 * Size 20. Size 14.
10036 * This is reasonably steep, but branch mispredict costs are
10037 * high on modern cpus, so consider failing only if optimizing
10041 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10042 && BRANCH_COST >= 2)
10048 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10049 /* We may be reversing unordered compare to normal compare,
10050 that is not valid in general (we may convert non-trapping
10051 condition to trapping one), however on i386 we currently
10052 emit all comparisons unordered. */
10053 code = reverse_condition_maybe_unordered (code);
10056 code = reverse_condition (code);
10057 if (compare_code != NIL)
10058 compare_code = reverse_condition (compare_code);
10062 if (compare_code != NIL)
10064 /* notl op1 (if needed)
10069 For x < 0 (resp. x <= -1) there will be no notl,
10070 so if possible swap the constants to get rid of the
10072 True/false will be -1/0 while code below (store flag
10073 followed by decrement) is 0/-1, so the constants need
10074 to be exchanged once more. */
10076 if (compare_code == GE || !cf)
10078 code = reverse_condition (code);
10083 HOST_WIDE_INT tmp = cf;
10088 out = emit_store_flag (out, code, ix86_compare_op0,
10089 ix86_compare_op1, VOIDmode, 0, -1);
10093 out = emit_store_flag (out, code, ix86_compare_op0,
10094 ix86_compare_op1, VOIDmode, 0, 1);
10096 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10097 copy_rtx (out), 1, OPTAB_DIRECT);
10100 out = expand_simple_binop (mode, AND, copy_rtx (out),
10101 gen_int_mode (cf - ct, mode),
10102 copy_rtx (out), 1, OPTAB_DIRECT);
10104 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10105 copy_rtx (out), 1, OPTAB_DIRECT);
10106 if (!rtx_equal_p (out, operands[0]))
10107 emit_move_insn (operands[0], copy_rtx (out));
10109 return 1; /* DONE */
10113 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10115 /* Try a few things more with specific constants and a variable. */
10118 rtx var, orig_out, out, tmp;
10120 if (BRANCH_COST <= 2)
10121 return 0; /* FAIL */
10123 /* If one of the two operands is an interesting constant, load a
10124 constant with the above and mask it in with a logical operation. */
10126 if (GET_CODE (operands[2]) == CONST_INT)
10129 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10130 operands[3] = constm1_rtx, op = and_optab;
10131 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10132 operands[3] = const0_rtx, op = ior_optab;
10134 return 0; /* FAIL */
10136 else if (GET_CODE (operands[3]) == CONST_INT)
10139 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10140 operands[2] = constm1_rtx, op = and_optab;
10141 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10142 operands[2] = const0_rtx, op = ior_optab;
10144 return 0; /* FAIL */
10147 return 0; /* FAIL */
10149 orig_out = operands[0];
10150 tmp = gen_reg_rtx (mode);
10153 /* Recurse to get the constant loaded. */
10154 if (ix86_expand_int_movcc (operands) == 0)
10155 return 0; /* FAIL */
10157 /* Mask in the interesting variable. */
10158 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10160 if (!rtx_equal_p (out, orig_out))
10161 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10163 return 1; /* DONE */
10167 * For comparison with above,
10177 if (! nonimmediate_operand (operands[2], mode))
10178 operands[2] = force_reg (mode, operands[2]);
10179 if (! nonimmediate_operand (operands[3], mode))
10180 operands[3] = force_reg (mode, operands[3]);
10182 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10184 rtx tmp = gen_reg_rtx (mode);
10185 emit_move_insn (tmp, operands[3]);
10188 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10190 rtx tmp = gen_reg_rtx (mode);
10191 emit_move_insn (tmp, operands[2]);
10195 if (! register_operand (operands[2], VOIDmode)
10197 || ! register_operand (operands[3], VOIDmode)))
10198 operands[2] = force_reg (mode, operands[2]);
10201 && ! register_operand (operands[3], VOIDmode))
10202 operands[3] = force_reg (mode, operands[3]);
10204 emit_insn (compare_seq);
10205 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10206 gen_rtx_IF_THEN_ELSE (mode,
10207 compare_op, operands[2],
10210 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10211 gen_rtx_IF_THEN_ELSE (mode,
10213 copy_rtx (operands[3]),
10214 copy_rtx (operands[0]))));
10216 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10217 gen_rtx_IF_THEN_ELSE (mode,
10219 copy_rtx (operands[2]),
10220 copy_rtx (operands[0]))));
10222 return 1; /* DONE */
10226 ix86_expand_fp_movcc (rtx operands[])
10228 enum rtx_code code;
10230 rtx compare_op, second_test, bypass_test;
10232 /* For SF/DFmode conditional moves based on comparisons
10233 in same mode, we may want to use SSE min/max instructions. */
10234 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10235 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10236 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10237 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10238 && (!TARGET_IEEE_FP
10239 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10240 /* We may be called from the post-reload splitter. */
10241 && (!REG_P (operands[0])
10242 || SSE_REG_P (operands[0])
10243 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10245 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10246 code = GET_CODE (operands[1]);
10248 /* See if we have (cross) match between comparison operands and
10249 conditional move operands. */
10250 if (rtx_equal_p (operands[2], op1))
10255 code = reverse_condition_maybe_unordered (code);
10257 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10259 /* Check for min operation. */
10260 if (code == LT || code == UNLE)
10268 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10269 if (memory_operand (op0, VOIDmode))
10270 op0 = force_reg (GET_MODE (operands[0]), op0);
10271 if (GET_MODE (operands[0]) == SFmode)
10272 emit_insn (gen_minsf3 (operands[0], op0, op1));
10274 emit_insn (gen_mindf3 (operands[0], op0, op1));
10277 /* Check for max operation. */
10278 if (code == GT || code == UNGE)
10286 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10287 if (memory_operand (op0, VOIDmode))
10288 op0 = force_reg (GET_MODE (operands[0]), op0);
10289 if (GET_MODE (operands[0]) == SFmode)
10290 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10292 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10296 /* Manage condition to be sse_comparison_operator. In case we are
10297 in non-ieee mode, try to canonicalize the destination operand
10298 to be first in the comparison - this helps reload to avoid extra
10300 if (!sse_comparison_operator (operands[1], VOIDmode)
10301 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10303 rtx tmp = ix86_compare_op0;
10304 ix86_compare_op0 = ix86_compare_op1;
10305 ix86_compare_op1 = tmp;
10306 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10307 VOIDmode, ix86_compare_op0,
10310 /* Similarly try to manage result to be first operand of conditional
10311 move. We also don't support the NE comparison on SSE, so try to
10313 if ((rtx_equal_p (operands[0], operands[3])
10314 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10315 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10317 rtx tmp = operands[2];
10318 operands[2] = operands[3];
10320 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10321 (GET_CODE (operands[1])),
10322 VOIDmode, ix86_compare_op0,
10325 if (GET_MODE (operands[0]) == SFmode)
10326 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10327 operands[2], operands[3],
10328 ix86_compare_op0, ix86_compare_op1));
10330 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10331 operands[2], operands[3],
10332 ix86_compare_op0, ix86_compare_op1));
10336 /* The floating point conditional move instructions don't directly
10337 support conditions resulting from a signed integer comparison. */
10339 code = GET_CODE (operands[1]);
10340 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10342 /* The floating point conditional move instructions don't directly
10343 support signed integer comparisons. */
10345 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10347 if (second_test != NULL || bypass_test != NULL)
10349 tmp = gen_reg_rtx (QImode);
10350 ix86_expand_setcc (code, tmp);
10352 ix86_compare_op0 = tmp;
10353 ix86_compare_op1 = const0_rtx;
10354 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10356 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10358 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10359 emit_move_insn (tmp, operands[3]);
10362 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10364 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10365 emit_move_insn (tmp, operands[2]);
10369 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10370 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10375 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10376 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10381 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10382 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10390 /* Expand conditional increment or decrement using adb/sbb instructions.
10391 The default case using setcc followed by the conditional move can be
10392 done by generic code. */
10394 ix86_expand_int_addcc (rtx operands[])
10396 enum rtx_code code = GET_CODE (operands[1]);
10398 rtx val = const0_rtx;
10399 bool fpcmp = false;
10400 enum machine_mode mode = GET_MODE (operands[0]);
10402 if (operands[3] != const1_rtx
10403 && operands[3] != constm1_rtx)
10405 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10406 ix86_compare_op1, &compare_op))
10408 code = GET_CODE (compare_op);
10410 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10411 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10414 code = ix86_fp_compare_code_to_integer (code);
10421 PUT_CODE (compare_op,
10422 reverse_condition_maybe_unordered
10423 (GET_CODE (compare_op)));
10425 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10427 PUT_MODE (compare_op, mode);
10429 /* Construct either adc or sbb insn. */
10430 if ((code == LTU) == (operands[3] == constm1_rtx))
10432 switch (GET_MODE (operands[0]))
10435 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10438 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10441 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10444 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10452 switch (GET_MODE (operands[0]))
10455 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10458 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10461 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10464 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10470 return 1; /* DONE */
10474 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10475 works for floating pointer parameters and nonoffsetable memories.
10476 For pushes, it returns just stack offsets; the values will be saved
10477 in the right order. Maximally three parts are generated. */
10480 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10485 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10487 size = (GET_MODE_SIZE (mode) + 4) / 8;
10489 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10491 if (size < 2 || size > 3)
10494 /* Optimize constant pool reference to immediates. This is used by fp
10495 moves, that force all constants to memory to allow combining. */
10496 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10498 rtx tmp = maybe_get_pool_constant (operand);
10503 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10505 /* The only non-offsetable memories we handle are pushes. */
10506 if (! push_operand (operand, VOIDmode))
10509 operand = copy_rtx (operand);
10510 PUT_MODE (operand, Pmode);
10511 parts[0] = parts[1] = parts[2] = operand;
10513 else if (!TARGET_64BIT)
10515 if (mode == DImode)
10516 split_di (&operand, 1, &parts[0], &parts[1]);
10519 if (REG_P (operand))
10521 if (!reload_completed)
10523 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10524 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10526 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10528 else if (offsettable_memref_p (operand))
10530 operand = adjust_address (operand, SImode, 0);
10531 parts[0] = operand;
10532 parts[1] = adjust_address (operand, SImode, 4);
10534 parts[2] = adjust_address (operand, SImode, 8);
10536 else if (GET_CODE (operand) == CONST_DOUBLE)
10541 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10545 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10546 parts[2] = gen_int_mode (l[2], SImode);
10549 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10554 parts[1] = gen_int_mode (l[1], SImode);
10555 parts[0] = gen_int_mode (l[0], SImode);
10563 if (mode == TImode)
10564 split_ti (&operand, 1, &parts[0], &parts[1]);
10565 if (mode == XFmode || mode == TFmode)
10567 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10568 if (REG_P (operand))
10570 if (!reload_completed)
10572 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10573 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10575 else if (offsettable_memref_p (operand))
10577 operand = adjust_address (operand, DImode, 0);
10578 parts[0] = operand;
10579 parts[1] = adjust_address (operand, upper_mode, 8);
10581 else if (GET_CODE (operand) == CONST_DOUBLE)
10586 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10587 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10588 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10589 if (HOST_BITS_PER_WIDE_INT >= 64)
10592 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10593 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10596 parts[0] = immed_double_const (l[0], l[1], DImode);
10597 if (upper_mode == SImode)
10598 parts[1] = gen_int_mode (l[2], SImode);
10599 else if (HOST_BITS_PER_WIDE_INT >= 64)
10602 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10603 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10606 parts[1] = immed_double_const (l[2], l[3], DImode);
10616 /* Emit insns to perform a move or push of DI, DF, and XF values.
10617 Return false when normal moves are needed; true when all required
10618 insns have been emitted. Operands 2-4 contain the input values
10619 int the correct order; operands 5-7 contain the output values. */
10622 ix86_split_long_move (rtx operands[])
10627 int collisions = 0;
10628 enum machine_mode mode = GET_MODE (operands[0]);
10630 /* The DFmode expanders may ask us to move double.
10631 For 64bit target this is single move. By hiding the fact
10632 here we simplify i386.md splitters. */
10633 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10635 /* Optimize constant pool reference to immediates. This is used by
10636 fp moves, that force all constants to memory to allow combining. */
10638 if (GET_CODE (operands[1]) == MEM
10639 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10640 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10641 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10642 if (push_operand (operands[0], VOIDmode))
10644 operands[0] = copy_rtx (operands[0]);
10645 PUT_MODE (operands[0], Pmode);
10648 operands[0] = gen_lowpart (DImode, operands[0]);
10649 operands[1] = gen_lowpart (DImode, operands[1]);
10650 emit_move_insn (operands[0], operands[1]);
10654 /* The only non-offsettable memory we handle is push. */
10655 if (push_operand (operands[0], VOIDmode))
10657 else if (GET_CODE (operands[0]) == MEM
10658 && ! offsettable_memref_p (operands[0]))
10661 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10662 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10664 /* When emitting push, take care for source operands on the stack. */
10665 if (push && GET_CODE (operands[1]) == MEM
10666 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10669 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10670 XEXP (part[1][2], 0));
10671 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10672 XEXP (part[1][1], 0));
10675 /* We need to do copy in the right order in case an address register
10676 of the source overlaps the destination. */
10677 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10679 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10681 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10684 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10687 /* Collision in the middle part can be handled by reordering. */
10688 if (collisions == 1 && nparts == 3
10689 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10692 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10693 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10696 /* If there are more collisions, we can't handle it by reordering.
10697 Do an lea to the last part and use only one colliding move. */
10698 else if (collisions > 1)
10704 base = part[0][nparts - 1];
10706 /* Handle the case when the last part isn't valid for lea.
10707 Happens in 64-bit mode storing the 12-byte XFmode. */
10708 if (GET_MODE (base) != Pmode)
10709 base = gen_rtx_REG (Pmode, REGNO (base));
10711 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10712 part[1][0] = replace_equiv_address (part[1][0], base);
10713 part[1][1] = replace_equiv_address (part[1][1],
10714 plus_constant (base, UNITS_PER_WORD));
10716 part[1][2] = replace_equiv_address (part[1][2],
10717 plus_constant (base, 8));
10727 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10729 emit_move_insn (part[0][2], part[1][2]);
10734 /* In 64bit mode we don't have 32bit push available. In case this is
10735 register, it is OK - we will just use larger counterpart. We also
10736 retype memory - these comes from attempt to avoid REX prefix on
10737 moving of second half of TFmode value. */
10738 if (GET_MODE (part[1][1]) == SImode)
10740 if (GET_CODE (part[1][1]) == MEM)
10741 part[1][1] = adjust_address (part[1][1], DImode, 0);
10742 else if (REG_P (part[1][1]))
10743 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10746 if (GET_MODE (part[1][0]) == SImode)
10747 part[1][0] = part[1][1];
10750 emit_move_insn (part[0][1], part[1][1]);
10751 emit_move_insn (part[0][0], part[1][0]);
10755 /* Choose correct order to not overwrite the source before it is copied. */
10756 if ((REG_P (part[0][0])
10757 && REG_P (part[1][1])
10758 && (REGNO (part[0][0]) == REGNO (part[1][1])
10760 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10762 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10766 operands[2] = part[0][2];
10767 operands[3] = part[0][1];
10768 operands[4] = part[0][0];
10769 operands[5] = part[1][2];
10770 operands[6] = part[1][1];
10771 operands[7] = part[1][0];
10775 operands[2] = part[0][1];
10776 operands[3] = part[0][0];
10777 operands[5] = part[1][1];
10778 operands[6] = part[1][0];
10785 operands[2] = part[0][0];
10786 operands[3] = part[0][1];
10787 operands[4] = part[0][2];
10788 operands[5] = part[1][0];
10789 operands[6] = part[1][1];
10790 operands[7] = part[1][2];
10794 operands[2] = part[0][0];
10795 operands[3] = part[0][1];
10796 operands[5] = part[1][0];
10797 operands[6] = part[1][1];
10800 emit_move_insn (operands[2], operands[5]);
10801 emit_move_insn (operands[3], operands[6]);
10803 emit_move_insn (operands[4], operands[7]);
10809 ix86_split_ashldi (rtx *operands, rtx scratch)
10811 rtx low[2], high[2];
10814 if (GET_CODE (operands[2]) == CONST_INT)
10816 split_di (operands, 2, low, high);
10817 count = INTVAL (operands[2]) & 63;
10821 emit_move_insn (high[0], low[1]);
10822 emit_move_insn (low[0], const0_rtx);
10825 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10829 if (!rtx_equal_p (operands[0], operands[1]))
10830 emit_move_insn (operands[0], operands[1]);
10831 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10832 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10837 if (!rtx_equal_p (operands[0], operands[1]))
10838 emit_move_insn (operands[0], operands[1]);
10840 split_di (operands, 1, low, high);
10842 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10843 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10845 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10847 if (! no_new_pseudos)
10848 scratch = force_reg (SImode, const0_rtx);
10850 emit_move_insn (scratch, const0_rtx);
10852 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10856 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10861 ix86_split_ashrdi (rtx *operands, rtx scratch)
10863 rtx low[2], high[2];
10866 if (GET_CODE (operands[2]) == CONST_INT)
10868 split_di (operands, 2, low, high);
10869 count = INTVAL (operands[2]) & 63;
10873 emit_move_insn (low[0], high[1]);
10875 if (! reload_completed)
10876 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10879 emit_move_insn (high[0], low[0]);
10880 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10884 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10888 if (!rtx_equal_p (operands[0], operands[1]))
10889 emit_move_insn (operands[0], operands[1]);
10890 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10891 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10896 if (!rtx_equal_p (operands[0], operands[1]))
10897 emit_move_insn (operands[0], operands[1]);
10899 split_di (operands, 1, low, high);
10901 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10902 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10904 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10906 if (! no_new_pseudos)
10907 scratch = gen_reg_rtx (SImode);
10908 emit_move_insn (scratch, high[0]);
10909 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10910 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10914 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10919 ix86_split_lshrdi (rtx *operands, rtx scratch)
10921 rtx low[2], high[2];
10924 if (GET_CODE (operands[2]) == CONST_INT)
10926 split_di (operands, 2, low, high);
10927 count = INTVAL (operands[2]) & 63;
10931 emit_move_insn (low[0], high[1]);
10932 emit_move_insn (high[0], const0_rtx);
10935 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10939 if (!rtx_equal_p (operands[0], operands[1]))
10940 emit_move_insn (operands[0], operands[1]);
10941 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10942 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10947 if (!rtx_equal_p (operands[0], operands[1]))
10948 emit_move_insn (operands[0], operands[1]);
10950 split_di (operands, 1, low, high);
10952 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10953 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10955 /* Heh. By reversing the arguments, we can reuse this pattern. */
10956 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10958 if (! no_new_pseudos)
10959 scratch = force_reg (SImode, const0_rtx);
10961 emit_move_insn (scratch, const0_rtx);
10963 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10967 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10971 /* Helper function for the string operations below. Dest VARIABLE whether
10972 it is aligned to VALUE bytes. If true, jump to the label. */
10974 ix86_expand_aligntest (rtx variable, int value)
10976 rtx label = gen_label_rtx ();
10977 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10978 if (GET_MODE (variable) == DImode)
10979 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10981 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10982 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10987 /* Adjust COUNTER by the VALUE. */
10989 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10991 if (GET_MODE (countreg) == DImode)
10992 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10994 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10997 /* Zero extend possibly SImode EXP to Pmode register. */
10999 ix86_zero_extend_to_Pmode (rtx exp)
11002 if (GET_MODE (exp) == VOIDmode)
11003 return force_reg (Pmode, exp);
11004 if (GET_MODE (exp) == Pmode)
11005 return copy_to_mode_reg (Pmode, exp);
11006 r = gen_reg_rtx (Pmode);
11007 emit_insn (gen_zero_extendsidi2 (r, exp));
11011 /* Expand string move (memcpy) operation. Use i386 string operations when
11012 profitable. expand_clrstr contains similar code. */
11014 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11016 rtx srcreg, destreg, countreg, srcexp, destexp;
11017 enum machine_mode counter_mode;
11018 HOST_WIDE_INT align = 0;
11019 unsigned HOST_WIDE_INT count = 0;
11021 if (GET_CODE (align_exp) == CONST_INT)
11022 align = INTVAL (align_exp);
11024 /* Can't use any of this if the user has appropriated esi or edi. */
11025 if (global_regs[4] || global_regs[5])
11028 /* This simple hack avoids all inlining code and simplifies code below. */
11029 if (!TARGET_ALIGN_STRINGOPS)
11032 if (GET_CODE (count_exp) == CONST_INT)
11034 count = INTVAL (count_exp);
11035 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11039 /* Figure out proper mode for counter. For 32bits it is always SImode,
11040 for 64bits use SImode when possible, otherwise DImode.
11041 Set count to number of bytes copied when known at compile time. */
11042 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11043 || x86_64_zero_extended_value (count_exp))
11044 counter_mode = SImode;
11046 counter_mode = DImode;
11048 if (counter_mode != SImode && counter_mode != DImode)
11051 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11052 if (destreg != XEXP (dst, 0))
11053 dst = replace_equiv_address_nv (dst, destreg);
11054 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11055 if (srcreg != XEXP (src, 0))
11056 src = replace_equiv_address_nv (src, srcreg);
11058 /* When optimizing for size emit simple rep ; movsb instruction for
11059 counts not divisible by 4. */
11061 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11063 emit_insn (gen_cld ());
11064 countreg = ix86_zero_extend_to_Pmode (count_exp);
11065 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11066 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11067 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11071 /* For constant aligned (or small unaligned) copies use rep movsl
11072 followed by code copying the rest. For PentiumPro ensure 8 byte
11073 alignment to allow rep movsl acceleration. */
11075 else if (count != 0
11077 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11078 || optimize_size || count < (unsigned int) 64))
11080 unsigned HOST_WIDE_INT offset = 0;
11081 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11082 rtx srcmem, dstmem;
11084 emit_insn (gen_cld ());
11085 if (count & ~(size - 1))
11087 countreg = copy_to_mode_reg (counter_mode,
11088 GEN_INT ((count >> (size == 4 ? 2 : 3))
11089 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11090 countreg = ix86_zero_extend_to_Pmode (countreg);
11092 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11093 GEN_INT (size == 4 ? 2 : 3));
11094 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11095 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11097 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11098 countreg, destexp, srcexp));
11099 offset = count & ~(size - 1);
11101 if (size == 8 && (count & 0x04))
11103 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11105 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11107 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11112 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11114 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11116 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11121 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11123 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11128 /* The generic code based on the glibc implementation:
11129 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11130 allowing accelerated copying there)
11131 - copy the data using rep movsl
11132 - copy the rest. */
11137 rtx srcmem, dstmem;
11138 int desired_alignment = (TARGET_PENTIUMPRO
11139 && (count == 0 || count >= (unsigned int) 260)
11140 ? 8 : UNITS_PER_WORD);
11141 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11142 dst = change_address (dst, BLKmode, destreg);
11143 src = change_address (src, BLKmode, srcreg);
11145 /* In case we don't know anything about the alignment, default to
11146 library version, since it is usually equally fast and result in
11149 Also emit call when we know that the count is large and call overhead
11150 will not be important. */
11151 if (!TARGET_INLINE_ALL_STRINGOPS
11152 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11155 if (TARGET_SINGLE_STRINGOP)
11156 emit_insn (gen_cld ());
11158 countreg2 = gen_reg_rtx (Pmode);
11159 countreg = copy_to_mode_reg (counter_mode, count_exp);
11161 /* We don't use loops to align destination and to copy parts smaller
11162 than 4 bytes, because gcc is able to optimize such code better (in
11163 the case the destination or the count really is aligned, gcc is often
11164 able to predict the branches) and also it is friendlier to the
11165 hardware branch prediction.
11167 Using loops is beneficial for generic case, because we can
11168 handle small counts using the loops. Many CPUs (such as Athlon)
11169 have large REP prefix setup costs.
11171 This is quite costly. Maybe we can revisit this decision later or
11172 add some customizability to this code. */
11174 if (count == 0 && align < desired_alignment)
11176 label = gen_label_rtx ();
11177 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11178 LEU, 0, counter_mode, 1, label);
11182 rtx label = ix86_expand_aligntest (destreg, 1);
11183 srcmem = change_address (src, QImode, srcreg);
11184 dstmem = change_address (dst, QImode, destreg);
11185 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11186 ix86_adjust_counter (countreg, 1);
11187 emit_label (label);
11188 LABEL_NUSES (label) = 1;
11192 rtx label = ix86_expand_aligntest (destreg, 2);
11193 srcmem = change_address (src, HImode, srcreg);
11194 dstmem = change_address (dst, HImode, destreg);
11195 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11196 ix86_adjust_counter (countreg, 2);
11197 emit_label (label);
11198 LABEL_NUSES (label) = 1;
11200 if (align <= 4 && desired_alignment > 4)
11202 rtx label = ix86_expand_aligntest (destreg, 4);
11203 srcmem = change_address (src, SImode, srcreg);
11204 dstmem = change_address (dst, SImode, destreg);
11205 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11206 ix86_adjust_counter (countreg, 4);
11207 emit_label (label);
11208 LABEL_NUSES (label) = 1;
11211 if (label && desired_alignment > 4 && !TARGET_64BIT)
11213 emit_label (label);
11214 LABEL_NUSES (label) = 1;
11217 if (!TARGET_SINGLE_STRINGOP)
11218 emit_insn (gen_cld ());
11221 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11223 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11227 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11228 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11230 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11231 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11232 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11233 countreg2, destexp, srcexp));
11237 emit_label (label);
11238 LABEL_NUSES (label) = 1;
11240 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11242 srcmem = change_address (src, SImode, srcreg);
11243 dstmem = change_address (dst, SImode, destreg);
11244 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11246 if ((align <= 4 || count == 0) && TARGET_64BIT)
11248 rtx label = ix86_expand_aligntest (countreg, 4);
11249 srcmem = change_address (src, SImode, srcreg);
11250 dstmem = change_address (dst, SImode, destreg);
11251 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11252 emit_label (label);
11253 LABEL_NUSES (label) = 1;
11255 if (align > 2 && count != 0 && (count & 2))
11257 srcmem = change_address (src, HImode, srcreg);
11258 dstmem = change_address (dst, HImode, destreg);
11259 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11261 if (align <= 2 || count == 0)
11263 rtx label = ix86_expand_aligntest (countreg, 2);
11264 srcmem = change_address (src, HImode, srcreg);
11265 dstmem = change_address (dst, HImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 emit_label (label);
11268 LABEL_NUSES (label) = 1;
11270 if (align > 1 && count != 0 && (count & 1))
11272 srcmem = change_address (src, QImode, srcreg);
11273 dstmem = change_address (dst, QImode, destreg);
11274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11276 if (align <= 1 || count == 0)
11278 rtx label = ix86_expand_aligntest (countreg, 1);
11279 srcmem = change_address (src, QImode, srcreg);
11280 dstmem = change_address (dst, QImode, destreg);
11281 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11282 emit_label (label);
11283 LABEL_NUSES (label) = 1;
11290 /* Expand string clear operation (bzero). Use i386 string operations when
11291 profitable. expand_movstr contains similar code. */
11293 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11295 rtx destreg, zeroreg, countreg, destexp;
11296 enum machine_mode counter_mode;
11297 HOST_WIDE_INT align = 0;
11298 unsigned HOST_WIDE_INT count = 0;
11300 if (GET_CODE (align_exp) == CONST_INT)
11301 align = INTVAL (align_exp);
11303 /* Can't use any of this if the user has appropriated esi. */
11304 if (global_regs[4])
11307 /* This simple hack avoids all inlining code and simplifies code below. */
11308 if (!TARGET_ALIGN_STRINGOPS)
11311 if (GET_CODE (count_exp) == CONST_INT)
11313 count = INTVAL (count_exp);
11314 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11317 /* Figure out proper mode for counter. For 32bits it is always SImode,
11318 for 64bits use SImode when possible, otherwise DImode.
11319 Set count to number of bytes copied when known at compile time. */
11320 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11321 || x86_64_zero_extended_value (count_exp))
11322 counter_mode = SImode;
11324 counter_mode = DImode;
11326 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11327 if (destreg != XEXP (dst, 0))
11328 dst = replace_equiv_address_nv (dst, destreg);
11330 emit_insn (gen_cld ());
11332 /* When optimizing for size emit simple rep ; movsb instruction for
11333 counts not divisible by 4. */
11335 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11337 countreg = ix86_zero_extend_to_Pmode (count_exp);
11338 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11339 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11340 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11342 else if (count != 0
11344 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11345 || optimize_size || count < (unsigned int) 64))
11347 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11348 unsigned HOST_WIDE_INT offset = 0;
11350 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11351 if (count & ~(size - 1))
11353 countreg = copy_to_mode_reg (counter_mode,
11354 GEN_INT ((count >> (size == 4 ? 2 : 3))
11355 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11356 countreg = ix86_zero_extend_to_Pmode (countreg);
11357 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11358 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11359 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11360 offset = count & ~(size - 1);
11362 if (size == 8 && (count & 0x04))
11364 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11366 emit_insn (gen_strset (destreg, mem,
11367 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11372 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11374 emit_insn (gen_strset (destreg, mem,
11375 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11380 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11382 emit_insn (gen_strset (destreg, mem,
11383 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11390 /* Compute desired alignment of the string operation. */
11391 int desired_alignment = (TARGET_PENTIUMPRO
11392 && (count == 0 || count >= (unsigned int) 260)
11393 ? 8 : UNITS_PER_WORD);
11395 /* In case we don't know anything about the alignment, default to
11396 library version, since it is usually equally fast and result in
11399 Also emit call when we know that the count is large and call overhead
11400 will not be important. */
11401 if (!TARGET_INLINE_ALL_STRINGOPS
11402 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11405 if (TARGET_SINGLE_STRINGOP)
11406 emit_insn (gen_cld ());
11408 countreg2 = gen_reg_rtx (Pmode);
11409 countreg = copy_to_mode_reg (counter_mode, count_exp);
11410 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11411 /* Get rid of MEM_OFFSET, it won't be accurate. */
11412 dst = change_address (dst, BLKmode, destreg);
11414 if (count == 0 && align < desired_alignment)
11416 label = gen_label_rtx ();
11417 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11418 LEU, 0, counter_mode, 1, label);
11422 rtx label = ix86_expand_aligntest (destreg, 1);
11423 emit_insn (gen_strset (destreg, dst,
11424 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11425 ix86_adjust_counter (countreg, 1);
11426 emit_label (label);
11427 LABEL_NUSES (label) = 1;
11431 rtx label = ix86_expand_aligntest (destreg, 2);
11432 emit_insn (gen_strset (destreg, dst,
11433 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11434 ix86_adjust_counter (countreg, 2);
11435 emit_label (label);
11436 LABEL_NUSES (label) = 1;
11438 if (align <= 4 && desired_alignment > 4)
11440 rtx label = ix86_expand_aligntest (destreg, 4);
11441 emit_insn (gen_strset (destreg, dst,
11443 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11445 ix86_adjust_counter (countreg, 4);
11446 emit_label (label);
11447 LABEL_NUSES (label) = 1;
11450 if (label && desired_alignment > 4 && !TARGET_64BIT)
11452 emit_label (label);
11453 LABEL_NUSES (label) = 1;
11457 if (!TARGET_SINGLE_STRINGOP)
11458 emit_insn (gen_cld ());
11461 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11463 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11467 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11468 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11470 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11471 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11475 emit_label (label);
11476 LABEL_NUSES (label) = 1;
11479 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11480 emit_insn (gen_strset (destreg, dst,
11481 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11482 if (TARGET_64BIT && (align <= 4 || count == 0))
11484 rtx label = ix86_expand_aligntest (countreg, 4);
11485 emit_insn (gen_strset (destreg, dst,
11486 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11487 emit_label (label);
11488 LABEL_NUSES (label) = 1;
11490 if (align > 2 && count != 0 && (count & 2))
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11493 if (align <= 2 || count == 0)
11495 rtx label = ix86_expand_aligntest (countreg, 2);
11496 emit_insn (gen_strset (destreg, dst,
11497 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11498 emit_label (label);
11499 LABEL_NUSES (label) = 1;
11501 if (align > 1 && count != 0 && (count & 1))
11502 emit_insn (gen_strset (destreg, dst,
11503 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11504 if (align <= 1 || count == 0)
11506 rtx label = ix86_expand_aligntest (countreg, 1);
11507 emit_insn (gen_strset (destreg, dst,
11508 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11509 emit_label (label);
11510 LABEL_NUSES (label) = 1;
11516 /* Expand strlen. */
11518 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11520 rtx addr, scratch1, scratch2, scratch3, scratch4;
11522 /* The generic case of strlen expander is long. Avoid it's
11523 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11525 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11526 && !TARGET_INLINE_ALL_STRINGOPS
11528 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11531 addr = force_reg (Pmode, XEXP (src, 0));
11532 scratch1 = gen_reg_rtx (Pmode);
11534 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11537 /* Well it seems that some optimizer does not combine a call like
11538 foo(strlen(bar), strlen(bar));
11539 when the move and the subtraction is done here. It does calculate
11540 the length just once when these instructions are done inside of
11541 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11542 often used and I use one fewer register for the lifetime of
11543 output_strlen_unroll() this is better. */
11545 emit_move_insn (out, addr);
11547 ix86_expand_strlensi_unroll_1 (out, src, align);
11549 /* strlensi_unroll_1 returns the address of the zero at the end of
11550 the string, like memchr(), so compute the length by subtracting
11551 the start address. */
11553 emit_insn (gen_subdi3 (out, out, addr));
11555 emit_insn (gen_subsi3 (out, out, addr));
11560 scratch2 = gen_reg_rtx (Pmode);
11561 scratch3 = gen_reg_rtx (Pmode);
11562 scratch4 = force_reg (Pmode, constm1_rtx);
11564 emit_move_insn (scratch3, addr);
11565 eoschar = force_reg (QImode, eoschar);
11567 emit_insn (gen_cld ());
11568 src = replace_equiv_address_nv (src, scratch3);
11570 /* If .md starts supporting :P, this can be done in .md. */
11571 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11572 scratch4), UNSPEC_SCAS);
11573 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11576 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11577 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11581 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11582 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11588 /* Expand the appropriate insns for doing strlen if not just doing
11591 out = result, initialized with the start address
11592 align_rtx = alignment of the address.
11593 scratch = scratch register, initialized with the startaddress when
11594 not aligned, otherwise undefined
11596 This is just the body. It needs the initializations mentioned above and
11597 some address computing at the end. These things are done in i386.md. */
11600 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11604 rtx align_2_label = NULL_RTX;
11605 rtx align_3_label = NULL_RTX;
11606 rtx align_4_label = gen_label_rtx ();
11607 rtx end_0_label = gen_label_rtx ();
11609 rtx tmpreg = gen_reg_rtx (SImode);
11610 rtx scratch = gen_reg_rtx (SImode);
11614 if (GET_CODE (align_rtx) == CONST_INT)
11615 align = INTVAL (align_rtx);
11617 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11619 /* Is there a known alignment and is it less than 4? */
11622 rtx scratch1 = gen_reg_rtx (Pmode);
11623 emit_move_insn (scratch1, out);
11624 /* Is there a known alignment and is it not 2? */
11627 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11628 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11630 /* Leave just the 3 lower bits. */
11631 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11632 NULL_RTX, 0, OPTAB_WIDEN);
11634 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11635 Pmode, 1, align_4_label);
11636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11637 Pmode, 1, align_2_label);
11638 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11639 Pmode, 1, align_3_label);
11643 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11644 check if is aligned to 4 - byte. */
11646 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11647 NULL_RTX, 0, OPTAB_WIDEN);
11649 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11650 Pmode, 1, align_4_label);
11653 mem = change_address (src, QImode, out);
11655 /* Now compare the bytes. */
11657 /* Compare the first n unaligned byte on a byte per byte basis. */
11658 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11659 QImode, 1, end_0_label);
11661 /* Increment the address. */
11663 emit_insn (gen_adddi3 (out, out, const1_rtx));
11665 emit_insn (gen_addsi3 (out, out, const1_rtx));
11667 /* Not needed with an alignment of 2 */
11670 emit_label (align_2_label);
11672 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11676 emit_insn (gen_adddi3 (out, out, const1_rtx));
11678 emit_insn (gen_addsi3 (out, out, const1_rtx));
11680 emit_label (align_3_label);
11683 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11687 emit_insn (gen_adddi3 (out, out, const1_rtx));
11689 emit_insn (gen_addsi3 (out, out, const1_rtx));
11692 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11693 align this loop. It gives only huge programs, but does not help to
11695 emit_label (align_4_label);
11697 mem = change_address (src, SImode, out);
11698 emit_move_insn (scratch, mem);
11700 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11702 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11704 /* This formula yields a nonzero result iff one of the bytes is zero.
11705 This saves three branches inside loop and many cycles. */
11707 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11708 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11709 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11710 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11711 gen_int_mode (0x80808080, SImode)));
11712 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11717 rtx reg = gen_reg_rtx (SImode);
11718 rtx reg2 = gen_reg_rtx (Pmode);
11719 emit_move_insn (reg, tmpreg);
11720 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11722 /* If zero is not in the first two bytes, move two bytes forward. */
11723 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11724 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11725 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11726 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11727 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11730 /* Emit lea manually to avoid clobbering of flags. */
11731 emit_insn (gen_rtx_SET (SImode, reg2,
11732 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11735 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11736 emit_insn (gen_rtx_SET (VOIDmode, out,
11737 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11744 rtx end_2_label = gen_label_rtx ();
11745 /* Is zero in the first two bytes? */
11747 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11748 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11749 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11750 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11751 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11753 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11754 JUMP_LABEL (tmp) = end_2_label;
11756 /* Not in the first two. Move two bytes forward. */
11757 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11759 emit_insn (gen_adddi3 (out, out, const2_rtx));
11761 emit_insn (gen_addsi3 (out, out, const2_rtx));
11763 emit_label (end_2_label);
11767 /* Avoid branch in fixing the byte. */
11768 tmpreg = gen_lowpart (QImode, tmpreg);
11769 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11770 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11772 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11774 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11776 emit_label (end_0_label);
11780 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11781 rtx callarg2 ATTRIBUTE_UNUSED,
11782 rtx pop, int sibcall)
11784 rtx use = NULL, call;
11786 if (pop == const0_rtx)
11788 if (TARGET_64BIT && pop)
11792 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11793 fnaddr = machopic_indirect_call_target (fnaddr);
11795 /* Static functions and indirect calls don't need the pic register. */
11796 if (! TARGET_64BIT && flag_pic
11797 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11798 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11799 use_reg (&use, pic_offset_table_rtx);
11801 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11803 rtx al = gen_rtx_REG (QImode, 0);
11804 emit_move_insn (al, callarg2);
11805 use_reg (&use, al);
11807 #endif /* TARGET_MACHO */
11809 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11811 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11812 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11814 if (sibcall && TARGET_64BIT
11815 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11818 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11819 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11820 emit_move_insn (fnaddr, addr);
11821 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11824 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11826 call = gen_rtx_SET (VOIDmode, retval, call);
11829 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11830 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11831 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11834 call = emit_call_insn (call);
11836 CALL_INSN_FUNCTION_USAGE (call) = use;
11840 /* Clear stack slot assignments remembered from previous functions.
11841 This is called from INIT_EXPANDERS once before RTL is emitted for each
11844 static struct machine_function *
11845 ix86_init_machine_status (void)
11847 struct machine_function *f;
11849 f = ggc_alloc_cleared (sizeof (struct machine_function));
11850 f->use_fast_prologue_epilogue_nregs = -1;
11855 /* Return a MEM corresponding to a stack slot with mode MODE.
11856 Allocate a new slot if necessary.
11858 The RTL for a function can have several slots available: N is
11859 which slot to use. */
11862 assign_386_stack_local (enum machine_mode mode, int n)
11864 struct stack_local_entry *s;
11866 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11869 for (s = ix86_stack_locals; s; s = s->next)
11870 if (s->mode == mode && s->n == n)
11873 s = (struct stack_local_entry *)
11874 ggc_alloc (sizeof (struct stack_local_entry));
11877 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11879 s->next = ix86_stack_locals;
11880 ix86_stack_locals = s;
11884 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11886 static GTY(()) rtx ix86_tls_symbol;
11888 ix86_tls_get_addr (void)
11891 if (!ix86_tls_symbol)
11893 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11894 (TARGET_GNU_TLS && !TARGET_64BIT)
11895 ? "___tls_get_addr"
11896 : "__tls_get_addr");
11899 return ix86_tls_symbol;
11902 /* Calculate the length of the memory address in the instruction
11903 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11906 memory_address_length (rtx addr)
11908 struct ix86_address parts;
11909 rtx base, index, disp;
11912 if (GET_CODE (addr) == PRE_DEC
11913 || GET_CODE (addr) == POST_INC
11914 || GET_CODE (addr) == PRE_MODIFY
11915 || GET_CODE (addr) == POST_MODIFY)
11918 if (! ix86_decompose_address (addr, &parts))
11922 index = parts.index;
11927 - esp as the base always wants an index,
11928 - ebp as the base always wants a displacement. */
11930 /* Register Indirect. */
11931 if (base && !index && !disp)
11933 /* esp (for its index) and ebp (for its displacement) need
11934 the two-byte modrm form. */
11935 if (addr == stack_pointer_rtx
11936 || addr == arg_pointer_rtx
11937 || addr == frame_pointer_rtx
11938 || addr == hard_frame_pointer_rtx)
11942 /* Direct Addressing. */
11943 else if (disp && !base && !index)
11948 /* Find the length of the displacement constant. */
11951 if (GET_CODE (disp) == CONST_INT
11952 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11958 /* ebp always wants a displacement. */
11959 else if (base == hard_frame_pointer_rtx)
11962 /* An index requires the two-byte modrm form.... */
11964 /* ...like esp, which always wants an index. */
11965 || base == stack_pointer_rtx
11966 || base == arg_pointer_rtx
11967 || base == frame_pointer_rtx)
11974 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11975 is set, expect that insn have 8bit immediate alternative. */
11977 ix86_attr_length_immediate_default (rtx insn, int shortform)
11981 extract_insn_cached (insn);
11982 for (i = recog_data.n_operands - 1; i >= 0; --i)
11983 if (CONSTANT_P (recog_data.operand[i]))
11988 && GET_CODE (recog_data.operand[i]) == CONST_INT
11989 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11993 switch (get_attr_mode (insn))
12004 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12009 fatal_insn ("unknown insn mode", insn);
12015 /* Compute default value for "length_address" attribute. */
12017 ix86_attr_length_address_default (rtx insn)
12021 if (get_attr_type (insn) == TYPE_LEA)
12023 rtx set = PATTERN (insn);
12024 if (GET_CODE (set) == SET)
12026 else if (GET_CODE (set) == PARALLEL
12027 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12028 set = XVECEXP (set, 0, 0);
12031 #ifdef ENABLE_CHECKING
12037 return memory_address_length (SET_SRC (set));
12040 extract_insn_cached (insn);
12041 for (i = recog_data.n_operands - 1; i >= 0; --i)
12042 if (GET_CODE (recog_data.operand[i]) == MEM)
12044 return memory_address_length (XEXP (recog_data.operand[i], 0));
12050 /* Return the maximum number of instructions a cpu can issue. */
12053 ix86_issue_rate (void)
12057 case PROCESSOR_PENTIUM:
12061 case PROCESSOR_PENTIUMPRO:
12062 case PROCESSOR_PENTIUM4:
12063 case PROCESSOR_ATHLON:
12072 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12073 by DEP_INSN and nothing set by DEP_INSN. */
12076 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12080 /* Simplify the test for uninteresting insns. */
12081 if (insn_type != TYPE_SETCC
12082 && insn_type != TYPE_ICMOV
12083 && insn_type != TYPE_FCMOV
12084 && insn_type != TYPE_IBR)
12087 if ((set = single_set (dep_insn)) != 0)
12089 set = SET_DEST (set);
12092 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12093 && XVECLEN (PATTERN (dep_insn), 0) == 2
12094 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12095 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12097 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12098 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12103 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12106 /* This test is true if the dependent insn reads the flags but
12107 not any other potentially set register. */
12108 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12111 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12117 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12118 address with operands set by DEP_INSN. */
12121 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12125 if (insn_type == TYPE_LEA
12128 addr = PATTERN (insn);
12129 if (GET_CODE (addr) == SET)
12131 else if (GET_CODE (addr) == PARALLEL
12132 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12133 addr = XVECEXP (addr, 0, 0);
12136 addr = SET_SRC (addr);
12141 extract_insn_cached (insn);
12142 for (i = recog_data.n_operands - 1; i >= 0; --i)
12143 if (GET_CODE (recog_data.operand[i]) == MEM)
12145 addr = XEXP (recog_data.operand[i], 0);
12152 return modified_in_p (addr, dep_insn);
12156 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12158 enum attr_type insn_type, dep_insn_type;
12159 enum attr_memory memory, dep_memory;
12161 int dep_insn_code_number;
12163 /* Anti and output dependencies have zero cost on all CPUs. */
12164 if (REG_NOTE_KIND (link) != 0)
12167 dep_insn_code_number = recog_memoized (dep_insn);
12169 /* If we can't recognize the insns, we can't really do anything. */
12170 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12173 insn_type = get_attr_type (insn);
12174 dep_insn_type = get_attr_type (dep_insn);
12178 case PROCESSOR_PENTIUM:
12179 /* Address Generation Interlock adds a cycle of latency. */
12180 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12183 /* ??? Compares pair with jump/setcc. */
12184 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12187 /* Floating point stores require value to be ready one cycle earlier. */
12188 if (insn_type == TYPE_FMOV
12189 && get_attr_memory (insn) == MEMORY_STORE
12190 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12194 case PROCESSOR_PENTIUMPRO:
12195 memory = get_attr_memory (insn);
12196 dep_memory = get_attr_memory (dep_insn);
12198 /* Since we can't represent delayed latencies of load+operation,
12199 increase the cost here for non-imov insns. */
12200 if (dep_insn_type != TYPE_IMOV
12201 && dep_insn_type != TYPE_FMOV
12202 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12205 /* INT->FP conversion is expensive. */
12206 if (get_attr_fp_int_src (dep_insn))
12209 /* There is one cycle extra latency between an FP op and a store. */
12210 if (insn_type == TYPE_FMOV
12211 && (set = single_set (dep_insn)) != NULL_RTX
12212 && (set2 = single_set (insn)) != NULL_RTX
12213 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12214 && GET_CODE (SET_DEST (set2)) == MEM)
12217 /* Show ability of reorder buffer to hide latency of load by executing
12218 in parallel with previous instruction in case
12219 previous instruction is not needed to compute the address. */
12220 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12221 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12223 /* Claim moves to take one cycle, as core can issue one load
12224 at time and the next load can start cycle later. */
12225 if (dep_insn_type == TYPE_IMOV
12226 || dep_insn_type == TYPE_FMOV)
12234 memory = get_attr_memory (insn);
12235 dep_memory = get_attr_memory (dep_insn);
12236 /* The esp dependency is resolved before the instruction is really
12238 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12239 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12242 /* Since we can't represent delayed latencies of load+operation,
12243 increase the cost here for non-imov insns. */
12244 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12245 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12247 /* INT->FP conversion is expensive. */
12248 if (get_attr_fp_int_src (dep_insn))
12251 /* Show ability of reorder buffer to hide latency of load by executing
12252 in parallel with previous instruction in case
12253 previous instruction is not needed to compute the address. */
12254 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12255 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12257 /* Claim moves to take one cycle, as core can issue one load
12258 at time and the next load can start cycle later. */
12259 if (dep_insn_type == TYPE_IMOV
12260 || dep_insn_type == TYPE_FMOV)
12269 case PROCESSOR_ATHLON:
12271 memory = get_attr_memory (insn);
12272 dep_memory = get_attr_memory (dep_insn);
12274 /* Show ability of reorder buffer to hide latency of load by executing
12275 in parallel with previous instruction in case
12276 previous instruction is not needed to compute the address. */
12277 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12278 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12280 enum attr_unit unit = get_attr_unit (insn);
12283 /* Because of the difference between the length of integer and
12284 floating unit pipeline preparation stages, the memory operands
12285 for floating point are cheaper.
12287 ??? For Athlon it the difference is most probably 2. */
12288 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12291 loadcost = TARGET_ATHLON ? 2 : 0;
12293 if (cost >= loadcost)
12308 struct ppro_sched_data
12311 int issued_this_cycle;
12315 static enum attr_ppro_uops
12316 ix86_safe_ppro_uops (rtx insn)
12318 if (recog_memoized (insn) >= 0)
12319 return get_attr_ppro_uops (insn);
12321 return PPRO_UOPS_MANY;
12325 ix86_dump_ppro_packet (FILE *dump)
12327 if (ix86_sched_data.ppro.decode[0])
12329 fprintf (dump, "PPRO packet: %d",
12330 INSN_UID (ix86_sched_data.ppro.decode[0]));
12331 if (ix86_sched_data.ppro.decode[1])
12332 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12333 if (ix86_sched_data.ppro.decode[2])
12334 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12335 fputc ('\n', dump);
12339 /* We're beginning a new block. Initialize data structures as necessary. */
12342 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12343 int sched_verbose ATTRIBUTE_UNUSED,
12344 int veclen ATTRIBUTE_UNUSED)
12346 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12349 /* Shift INSN to SLOT, and shift everything else down. */
12352 ix86_reorder_insn (rtx *insnp, rtx *slot)
12358 insnp[0] = insnp[1];
12359 while (++insnp != slot);
12365 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12368 enum attr_ppro_uops cur_uops;
12369 int issued_this_cycle;
12373 /* At this point .ppro.decode contains the state of the three
12374 decoders from last "cycle". That is, those insns that were
12375 actually independent. But here we're scheduling for the
12376 decoder, and we may find things that are decodable in the
12379 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12380 issued_this_cycle = 0;
12383 cur_uops = ix86_safe_ppro_uops (*insnp);
12385 /* If the decoders are empty, and we've a complex insn at the
12386 head of the priority queue, let it issue without complaint. */
12387 if (decode[0] == NULL)
12389 if (cur_uops == PPRO_UOPS_MANY)
12391 decode[0] = *insnp;
12395 /* Otherwise, search for a 2-4 uop unsn to issue. */
12396 while (cur_uops != PPRO_UOPS_FEW)
12398 if (insnp == ready)
12400 cur_uops = ix86_safe_ppro_uops (*--insnp);
12403 /* If so, move it to the head of the line. */
12404 if (cur_uops == PPRO_UOPS_FEW)
12405 ix86_reorder_insn (insnp, e_ready);
12407 /* Issue the head of the queue. */
12408 issued_this_cycle = 1;
12409 decode[0] = *e_ready--;
12412 /* Look for simple insns to fill in the other two slots. */
12413 for (i = 1; i < 3; ++i)
12414 if (decode[i] == NULL)
12416 if (ready > e_ready)
12420 cur_uops = ix86_safe_ppro_uops (*insnp);
12421 while (cur_uops != PPRO_UOPS_ONE)
12423 if (insnp == ready)
12425 cur_uops = ix86_safe_ppro_uops (*--insnp);
12428 /* Found one. Move it to the head of the queue and issue it. */
12429 if (cur_uops == PPRO_UOPS_ONE)
12431 ix86_reorder_insn (insnp, e_ready);
12432 decode[i] = *e_ready--;
12433 issued_this_cycle++;
12437 /* ??? Didn't find one. Ideally, here we would do a lazy split
12438 of 2-uop insns, issue one and queue the other. */
12442 if (issued_this_cycle == 0)
12443 issued_this_cycle = 1;
12444 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12447 /* We are about to being issuing insns for this clock cycle.
12448 Override the default sort algorithm to better slot instructions. */
12450 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12451 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12452 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12454 int n_ready = *n_readyp;
12455 rtx *e_ready = ready + n_ready - 1;
12457 /* Make sure to go ahead and initialize key items in
12458 ix86_sched_data if we are not going to bother trying to
12459 reorder the ready queue. */
12462 ix86_sched_data.ppro.issued_this_cycle = 1;
12471 case PROCESSOR_PENTIUMPRO:
12472 ix86_sched_reorder_ppro (ready, e_ready);
12477 return ix86_issue_rate ();
12480 /* We are about to issue INSN. Return the number of insns left on the
12481 ready queue that can be issued this cycle. */
12484 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12485 int can_issue_more)
12491 return can_issue_more - 1;
12493 case PROCESSOR_PENTIUMPRO:
12495 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12497 if (uops == PPRO_UOPS_MANY)
12500 ix86_dump_ppro_packet (dump);
12501 ix86_sched_data.ppro.decode[0] = insn;
12502 ix86_sched_data.ppro.decode[1] = NULL;
12503 ix86_sched_data.ppro.decode[2] = NULL;
12505 ix86_dump_ppro_packet (dump);
12506 ix86_sched_data.ppro.decode[0] = NULL;
12508 else if (uops == PPRO_UOPS_FEW)
12511 ix86_dump_ppro_packet (dump);
12512 ix86_sched_data.ppro.decode[0] = insn;
12513 ix86_sched_data.ppro.decode[1] = NULL;
12514 ix86_sched_data.ppro.decode[2] = NULL;
12518 for (i = 0; i < 3; ++i)
12519 if (ix86_sched_data.ppro.decode[i] == NULL)
12521 ix86_sched_data.ppro.decode[i] = insn;
12529 ix86_dump_ppro_packet (dump);
12530 ix86_sched_data.ppro.decode[0] = NULL;
12531 ix86_sched_data.ppro.decode[1] = NULL;
12532 ix86_sched_data.ppro.decode[2] = NULL;
12536 return --ix86_sched_data.ppro.issued_this_cycle;
12541 ia32_use_dfa_pipeline_interface (void)
12543 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12548 /* How many alternative schedules to try. This should be as wide as the
12549 scheduling freedom in the DFA, but no wider. Making this value too
12550 large results extra work for the scheduler. */
12553 ia32_multipass_dfa_lookahead (void)
12555 if (ix86_tune == PROCESSOR_PENTIUM)
12562 /* Compute the alignment given to a constant that is being placed in memory.
12563 EXP is the constant and ALIGN is the alignment that the object would
12565 The value of this function is used instead of that alignment to align
12569 ix86_constant_alignment (tree exp, int align)
12571 if (TREE_CODE (exp) == REAL_CST)
12573 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12575 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12578 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12579 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12580 return BITS_PER_WORD;
12585 /* Compute the alignment for a static variable.
12586 TYPE is the data type, and ALIGN is the alignment that
12587 the object would ordinarily have. The value of this function is used
12588 instead of that alignment to align the object. */
12591 ix86_data_alignment (tree type, int align)
12593 if (AGGREGATE_TYPE_P (type)
12594 && TYPE_SIZE (type)
12595 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12596 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12597 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12600 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12601 to 16byte boundary. */
12604 if (AGGREGATE_TYPE_P (type)
12605 && TYPE_SIZE (type)
12606 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12607 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12608 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12612 if (TREE_CODE (type) == ARRAY_TYPE)
12614 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12616 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12619 else if (TREE_CODE (type) == COMPLEX_TYPE)
12622 if (TYPE_MODE (type) == DCmode && align < 64)
12624 if (TYPE_MODE (type) == XCmode && align < 128)
12627 else if ((TREE_CODE (type) == RECORD_TYPE
12628 || TREE_CODE (type) == UNION_TYPE
12629 || TREE_CODE (type) == QUAL_UNION_TYPE)
12630 && TYPE_FIELDS (type))
12632 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12634 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12637 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12638 || TREE_CODE (type) == INTEGER_TYPE)
12640 if (TYPE_MODE (type) == DFmode && align < 64)
12642 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12649 /* Compute the alignment for a local variable.
12650 TYPE is the data type, and ALIGN is the alignment that
12651 the object would ordinarily have. The value of this macro is used
12652 instead of that alignment to align the object. */
12655 ix86_local_alignment (tree type, int align)
12657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12658 to 16byte boundary. */
12661 if (AGGREGATE_TYPE_P (type)
12662 && TYPE_SIZE (type)
12663 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12664 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12665 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12668 if (TREE_CODE (type) == ARRAY_TYPE)
12670 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12672 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12675 else if (TREE_CODE (type) == COMPLEX_TYPE)
12677 if (TYPE_MODE (type) == DCmode && align < 64)
12679 if (TYPE_MODE (type) == XCmode && align < 128)
12682 else if ((TREE_CODE (type) == RECORD_TYPE
12683 || TREE_CODE (type) == UNION_TYPE
12684 || TREE_CODE (type) == QUAL_UNION_TYPE)
12685 && TYPE_FIELDS (type))
12687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12693 || TREE_CODE (type) == INTEGER_TYPE)
12696 if (TYPE_MODE (type) == DFmode && align < 64)
12698 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12704 /* Emit RTL insns to initialize the variable parts of a trampoline.
12705 FNADDR is an RTX for the address of the function's pure code.
12706 CXT is an RTX for the static chain value for the function. */
12708 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12712 /* Compute offset from the end of the jmp to the target function. */
12713 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12714 plus_constant (tramp, 10),
12715 NULL_RTX, 1, OPTAB_DIRECT);
12716 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12717 gen_int_mode (0xb9, QImode));
12718 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12719 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12720 gen_int_mode (0xe9, QImode));
12721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12726 /* Try to load address using shorter movl instead of movabs.
12727 We may want to support movq for kernel mode, but kernel does not use
12728 trampolines at the moment. */
12729 if (x86_64_zero_extended_value (fnaddr))
12731 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12732 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12733 gen_int_mode (0xbb41, HImode));
12734 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12735 gen_lowpart (SImode, fnaddr));
12740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12741 gen_int_mode (0xbb49, HImode));
12742 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12746 /* Load static chain using movabs to r10. */
12747 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12748 gen_int_mode (0xba49, HImode));
12749 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12752 /* Jump to the r11 */
12753 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12754 gen_int_mode (0xff49, HImode));
12755 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12756 gen_int_mode (0xe3, QImode));
12758 if (offset > TRAMPOLINE_SIZE)
12762 #ifdef TRANSFER_FROM_TRAMPOLINE
12763 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12764 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12768 #define def_builtin(MASK, NAME, TYPE, CODE) \
12770 if ((MASK) & target_flags \
12771 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12772 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12773 NULL, NULL_TREE); \
12776 struct builtin_description
12778 const unsigned int mask;
12779 const enum insn_code icode;
12780 const char *const name;
12781 const enum ix86_builtins code;
12782 const enum rtx_code comparison;
12783 const unsigned int flag;
12786 static const struct builtin_description bdesc_comi[] =
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12814 static const struct builtin_description bdesc_2arg[] =
12817 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12823 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12828 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12829 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12830 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12831 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12834 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12835 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12836 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12837 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12842 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12844 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12845 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12847 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12852 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12860 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12861 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12864 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12865 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12867 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12868 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12869 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12870 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12871 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12873 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12877 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12878 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12879 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12880 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12883 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12884 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12886 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12888 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12889 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12891 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12892 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12894 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12895 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12896 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12897 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12898 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12903 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12904 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12918 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12919 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12920 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12922 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12925 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12926 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12927 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12930 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12932 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12936 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12938 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12939 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12956 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12957 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12958 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12959 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12962 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12963 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12964 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12965 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12970 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12972 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12973 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12975 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13003 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13006 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13071 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13076 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13077 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13078 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13079 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13080 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13081 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13084 static const struct builtin_description bdesc_1arg[] =
13086 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13087 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13090 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13091 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13093 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13094 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13095 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13096 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13097 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13098 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13120 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13121 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13127 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13130 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13131 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13132 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13136 ix86_init_builtins (void)
13139 ix86_init_mmx_sse_builtins ();
13142 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13143 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13146 ix86_init_mmx_sse_builtins (void)
13148 const struct builtin_description * d;
13151 tree pchar_type_node = build_pointer_type (char_type_node);
13152 tree pcchar_type_node = build_pointer_type (
13153 build_type_variant (char_type_node, 1, 0));
13154 tree pfloat_type_node = build_pointer_type (float_type_node);
13155 tree pcfloat_type_node = build_pointer_type (
13156 build_type_variant (float_type_node, 1, 0));
13157 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13158 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13159 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13162 tree int_ftype_v4sf_v4sf
13163 = build_function_type_list (integer_type_node,
13164 V4SF_type_node, V4SF_type_node, NULL_TREE);
13165 tree v4si_ftype_v4sf_v4sf
13166 = build_function_type_list (V4SI_type_node,
13167 V4SF_type_node, V4SF_type_node, NULL_TREE);
13168 /* MMX/SSE/integer conversions. */
13169 tree int_ftype_v4sf
13170 = build_function_type_list (integer_type_node,
13171 V4SF_type_node, NULL_TREE);
13172 tree int64_ftype_v4sf
13173 = build_function_type_list (long_long_integer_type_node,
13174 V4SF_type_node, NULL_TREE);
13175 tree int_ftype_v8qi
13176 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13177 tree v4sf_ftype_v4sf_int
13178 = build_function_type_list (V4SF_type_node,
13179 V4SF_type_node, integer_type_node, NULL_TREE);
13180 tree v4sf_ftype_v4sf_int64
13181 = build_function_type_list (V4SF_type_node,
13182 V4SF_type_node, long_long_integer_type_node,
13184 tree v4sf_ftype_v4sf_v2si
13185 = build_function_type_list (V4SF_type_node,
13186 V4SF_type_node, V2SI_type_node, NULL_TREE);
13187 tree int_ftype_v4hi_int
13188 = build_function_type_list (integer_type_node,
13189 V4HI_type_node, integer_type_node, NULL_TREE);
13190 tree v4hi_ftype_v4hi_int_int
13191 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13192 integer_type_node, integer_type_node,
13194 /* Miscellaneous. */
13195 tree v8qi_ftype_v4hi_v4hi
13196 = build_function_type_list (V8QI_type_node,
13197 V4HI_type_node, V4HI_type_node, NULL_TREE);
13198 tree v4hi_ftype_v2si_v2si
13199 = build_function_type_list (V4HI_type_node,
13200 V2SI_type_node, V2SI_type_node, NULL_TREE);
13201 tree v4sf_ftype_v4sf_v4sf_int
13202 = build_function_type_list (V4SF_type_node,
13203 V4SF_type_node, V4SF_type_node,
13204 integer_type_node, NULL_TREE);
13205 tree v2si_ftype_v4hi_v4hi
13206 = build_function_type_list (V2SI_type_node,
13207 V4HI_type_node, V4HI_type_node, NULL_TREE);
13208 tree v4hi_ftype_v4hi_int
13209 = build_function_type_list (V4HI_type_node,
13210 V4HI_type_node, integer_type_node, NULL_TREE);
13211 tree v4hi_ftype_v4hi_di
13212 = build_function_type_list (V4HI_type_node,
13213 V4HI_type_node, long_long_unsigned_type_node,
13215 tree v2si_ftype_v2si_di
13216 = build_function_type_list (V2SI_type_node,
13217 V2SI_type_node, long_long_unsigned_type_node,
13219 tree void_ftype_void
13220 = build_function_type (void_type_node, void_list_node);
13221 tree void_ftype_unsigned
13222 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13223 tree void_ftype_unsigned_unsigned
13224 = build_function_type_list (void_type_node, unsigned_type_node,
13225 unsigned_type_node, NULL_TREE);
13226 tree void_ftype_pcvoid_unsigned_unsigned
13227 = build_function_type_list (void_type_node, const_ptr_type_node,
13228 unsigned_type_node, unsigned_type_node,
13230 tree unsigned_ftype_void
13231 = build_function_type (unsigned_type_node, void_list_node);
13233 = build_function_type (long_long_unsigned_type_node, void_list_node);
13234 tree v4sf_ftype_void
13235 = build_function_type (V4SF_type_node, void_list_node);
13236 tree v2si_ftype_v4sf
13237 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13238 /* Loads/stores. */
13239 tree void_ftype_v8qi_v8qi_pchar
13240 = build_function_type_list (void_type_node,
13241 V8QI_type_node, V8QI_type_node,
13242 pchar_type_node, NULL_TREE);
13243 tree v4sf_ftype_pcfloat
13244 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13245 /* @@@ the type is bogus */
13246 tree v4sf_ftype_v4sf_pv2si
13247 = build_function_type_list (V4SF_type_node,
13248 V4SF_type_node, pv2si_type_node, NULL_TREE);
13249 tree void_ftype_pv2si_v4sf
13250 = build_function_type_list (void_type_node,
13251 pv2si_type_node, V4SF_type_node, NULL_TREE);
13252 tree void_ftype_pfloat_v4sf
13253 = build_function_type_list (void_type_node,
13254 pfloat_type_node, V4SF_type_node, NULL_TREE);
13255 tree void_ftype_pdi_di
13256 = build_function_type_list (void_type_node,
13257 pdi_type_node, long_long_unsigned_type_node,
13259 tree void_ftype_pv2di_v2di
13260 = build_function_type_list (void_type_node,
13261 pv2di_type_node, V2DI_type_node, NULL_TREE);
13262 /* Normal vector unops. */
13263 tree v4sf_ftype_v4sf
13264 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13266 /* Normal vector binops. */
13267 tree v4sf_ftype_v4sf_v4sf
13268 = build_function_type_list (V4SF_type_node,
13269 V4SF_type_node, V4SF_type_node, NULL_TREE);
13270 tree v8qi_ftype_v8qi_v8qi
13271 = build_function_type_list (V8QI_type_node,
13272 V8QI_type_node, V8QI_type_node, NULL_TREE);
13273 tree v4hi_ftype_v4hi_v4hi
13274 = build_function_type_list (V4HI_type_node,
13275 V4HI_type_node, V4HI_type_node, NULL_TREE);
13276 tree v2si_ftype_v2si_v2si
13277 = build_function_type_list (V2SI_type_node,
13278 V2SI_type_node, V2SI_type_node, NULL_TREE);
13279 tree di_ftype_di_di
13280 = build_function_type_list (long_long_unsigned_type_node,
13281 long_long_unsigned_type_node,
13282 long_long_unsigned_type_node, NULL_TREE);
13284 tree v2si_ftype_v2sf
13285 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13286 tree v2sf_ftype_v2si
13287 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13288 tree v2si_ftype_v2si
13289 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13290 tree v2sf_ftype_v2sf
13291 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13292 tree v2sf_ftype_v2sf_v2sf
13293 = build_function_type_list (V2SF_type_node,
13294 V2SF_type_node, V2SF_type_node, NULL_TREE);
13295 tree v2si_ftype_v2sf_v2sf
13296 = build_function_type_list (V2SI_type_node,
13297 V2SF_type_node, V2SF_type_node, NULL_TREE);
13298 tree pint_type_node = build_pointer_type (integer_type_node);
13299 tree pcint_type_node = build_pointer_type (
13300 build_type_variant (integer_type_node, 1, 0));
13301 tree pdouble_type_node = build_pointer_type (double_type_node);
13302 tree pcdouble_type_node = build_pointer_type (
13303 build_type_variant (double_type_node, 1, 0));
13304 tree int_ftype_v2df_v2df
13305 = build_function_type_list (integer_type_node,
13306 V2DF_type_node, V2DF_type_node, NULL_TREE);
13309 = build_function_type (intTI_type_node, void_list_node);
13310 tree v2di_ftype_void
13311 = build_function_type (V2DI_type_node, void_list_node);
13312 tree ti_ftype_ti_ti
13313 = build_function_type_list (intTI_type_node,
13314 intTI_type_node, intTI_type_node, NULL_TREE);
13315 tree void_ftype_pcvoid
13316 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13318 = build_function_type_list (V2DI_type_node,
13319 long_long_unsigned_type_node, NULL_TREE);
13321 = build_function_type_list (long_long_unsigned_type_node,
13322 V2DI_type_node, NULL_TREE);
13323 tree v4sf_ftype_v4si
13324 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13325 tree v4si_ftype_v4sf
13326 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13327 tree v2df_ftype_v4si
13328 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13329 tree v4si_ftype_v2df
13330 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13331 tree v2si_ftype_v2df
13332 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13333 tree v4sf_ftype_v2df
13334 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13335 tree v2df_ftype_v2si
13336 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13337 tree v2df_ftype_v4sf
13338 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13339 tree int_ftype_v2df
13340 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13341 tree int64_ftype_v2df
13342 = build_function_type_list (long_long_integer_type_node,
13343 V2DF_type_node, NULL_TREE);
13344 tree v2df_ftype_v2df_int
13345 = build_function_type_list (V2DF_type_node,
13346 V2DF_type_node, integer_type_node, NULL_TREE);
13347 tree v2df_ftype_v2df_int64
13348 = build_function_type_list (V2DF_type_node,
13349 V2DF_type_node, long_long_integer_type_node,
13351 tree v4sf_ftype_v4sf_v2df
13352 = build_function_type_list (V4SF_type_node,
13353 V4SF_type_node, V2DF_type_node, NULL_TREE);
13354 tree v2df_ftype_v2df_v4sf
13355 = build_function_type_list (V2DF_type_node,
13356 V2DF_type_node, V4SF_type_node, NULL_TREE);
13357 tree v2df_ftype_v2df_v2df_int
13358 = build_function_type_list (V2DF_type_node,
13359 V2DF_type_node, V2DF_type_node,
13362 tree v2df_ftype_v2df_pv2si
13363 = build_function_type_list (V2DF_type_node,
13364 V2DF_type_node, pv2si_type_node, NULL_TREE);
13365 tree void_ftype_pv2si_v2df
13366 = build_function_type_list (void_type_node,
13367 pv2si_type_node, V2DF_type_node, NULL_TREE);
13368 tree void_ftype_pdouble_v2df
13369 = build_function_type_list (void_type_node,
13370 pdouble_type_node, V2DF_type_node, NULL_TREE);
13371 tree void_ftype_pint_int
13372 = build_function_type_list (void_type_node,
13373 pint_type_node, integer_type_node, NULL_TREE);
13374 tree void_ftype_v16qi_v16qi_pchar
13375 = build_function_type_list (void_type_node,
13376 V16QI_type_node, V16QI_type_node,
13377 pchar_type_node, NULL_TREE);
13378 tree v2df_ftype_pcdouble
13379 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13380 tree v2df_ftype_v2df_v2df
13381 = build_function_type_list (V2DF_type_node,
13382 V2DF_type_node, V2DF_type_node, NULL_TREE);
13383 tree v16qi_ftype_v16qi_v16qi
13384 = build_function_type_list (V16QI_type_node,
13385 V16QI_type_node, V16QI_type_node, NULL_TREE);
13386 tree v8hi_ftype_v8hi_v8hi
13387 = build_function_type_list (V8HI_type_node,
13388 V8HI_type_node, V8HI_type_node, NULL_TREE);
13389 tree v4si_ftype_v4si_v4si
13390 = build_function_type_list (V4SI_type_node,
13391 V4SI_type_node, V4SI_type_node, NULL_TREE);
13392 tree v2di_ftype_v2di_v2di
13393 = build_function_type_list (V2DI_type_node,
13394 V2DI_type_node, V2DI_type_node, NULL_TREE);
13395 tree v2di_ftype_v2df_v2df
13396 = build_function_type_list (V2DI_type_node,
13397 V2DF_type_node, V2DF_type_node, NULL_TREE);
13398 tree v2df_ftype_v2df
13399 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13400 tree v2df_ftype_double
13401 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13402 tree v2df_ftype_double_double
13403 = build_function_type_list (V2DF_type_node,
13404 double_type_node, double_type_node, NULL_TREE);
13405 tree int_ftype_v8hi_int
13406 = build_function_type_list (integer_type_node,
13407 V8HI_type_node, integer_type_node, NULL_TREE);
13408 tree v8hi_ftype_v8hi_int_int
13409 = build_function_type_list (V8HI_type_node,
13410 V8HI_type_node, integer_type_node,
13411 integer_type_node, NULL_TREE);
13412 tree v2di_ftype_v2di_int
13413 = build_function_type_list (V2DI_type_node,
13414 V2DI_type_node, integer_type_node, NULL_TREE);
13415 tree v4si_ftype_v4si_int
13416 = build_function_type_list (V4SI_type_node,
13417 V4SI_type_node, integer_type_node, NULL_TREE);
13418 tree v8hi_ftype_v8hi_int
13419 = build_function_type_list (V8HI_type_node,
13420 V8HI_type_node, integer_type_node, NULL_TREE);
13421 tree v8hi_ftype_v8hi_v2di
13422 = build_function_type_list (V8HI_type_node,
13423 V8HI_type_node, V2DI_type_node, NULL_TREE);
13424 tree v4si_ftype_v4si_v2di
13425 = build_function_type_list (V4SI_type_node,
13426 V4SI_type_node, V2DI_type_node, NULL_TREE);
13427 tree v4si_ftype_v8hi_v8hi
13428 = build_function_type_list (V4SI_type_node,
13429 V8HI_type_node, V8HI_type_node, NULL_TREE);
13430 tree di_ftype_v8qi_v8qi
13431 = build_function_type_list (long_long_unsigned_type_node,
13432 V8QI_type_node, V8QI_type_node, NULL_TREE);
13433 tree v2di_ftype_v16qi_v16qi
13434 = build_function_type_list (V2DI_type_node,
13435 V16QI_type_node, V16QI_type_node, NULL_TREE);
13436 tree int_ftype_v16qi
13437 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13438 tree v16qi_ftype_pcchar
13439 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13440 tree void_ftype_pchar_v16qi
13441 = build_function_type_list (void_type_node,
13442 pchar_type_node, V16QI_type_node, NULL_TREE);
13443 tree v4si_ftype_pcint
13444 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13445 tree void_ftype_pcint_v4si
13446 = build_function_type_list (void_type_node,
13447 pcint_type_node, V4SI_type_node, NULL_TREE);
13448 tree v2di_ftype_v2di
13449 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13452 tree float128_type;
13454 /* The __float80 type. */
13455 if (TYPE_MODE (long_double_type_node) == XFmode)
13456 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13460 /* The __float80 type. */
13461 float80_type = make_node (REAL_TYPE);
13462 TYPE_PRECISION (float80_type) = 96;
13463 layout_type (float80_type);
13464 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13467 float128_type = make_node (REAL_TYPE);
13468 TYPE_PRECISION (float128_type) = 128;
13469 layout_type (float128_type);
13470 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13472 /* Add all builtins that are more or less simple operations on two
13474 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13476 /* Use one of the operands; the target can have a different mode for
13477 mask-generating compares. */
13478 enum machine_mode mode;
13483 mode = insn_data[d->icode].operand[1].mode;
13488 type = v16qi_ftype_v16qi_v16qi;
13491 type = v8hi_ftype_v8hi_v8hi;
13494 type = v4si_ftype_v4si_v4si;
13497 type = v2di_ftype_v2di_v2di;
13500 type = v2df_ftype_v2df_v2df;
13503 type = ti_ftype_ti_ti;
13506 type = v4sf_ftype_v4sf_v4sf;
13509 type = v8qi_ftype_v8qi_v8qi;
13512 type = v4hi_ftype_v4hi_v4hi;
13515 type = v2si_ftype_v2si_v2si;
13518 type = di_ftype_di_di;
13525 /* Override for comparisons. */
13526 if (d->icode == CODE_FOR_maskcmpv4sf3
13527 || d->icode == CODE_FOR_maskncmpv4sf3
13528 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13529 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13530 type = v4si_ftype_v4sf_v4sf;
13532 if (d->icode == CODE_FOR_maskcmpv2df3
13533 || d->icode == CODE_FOR_maskncmpv2df3
13534 || d->icode == CODE_FOR_vmmaskcmpv2df3
13535 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13536 type = v2di_ftype_v2df_v2df;
13538 def_builtin (d->mask, d->name, type, d->code);
13541 /* Add the remaining MMX insns with somewhat more complicated types. */
13542 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13543 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13544 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13545 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13546 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13548 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13549 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13550 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13552 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13553 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13555 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13556 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13558 /* comi/ucomi insns. */
13559 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13560 if (d->mask == MASK_SSE2)
13561 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13563 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13565 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13566 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13567 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13569 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13570 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13571 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13572 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13573 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13574 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13575 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13576 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13577 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13578 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13579 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13581 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13582 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13584 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13586 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13588 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13590 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13591 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13593 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13596 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13598 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13599 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13600 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13601 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13603 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13605 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13607 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13608 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13609 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13614 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13616 /* Original 3DNow! */
13617 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13618 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13619 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13635 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13636 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13638 /* 3DNow! extension as used in the Athlon CPU. */
13639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13640 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13641 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13642 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13643 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13644 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13646 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13697 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13698 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13705 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13729 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13757 /* Prescott New Instructions. */
13758 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13759 void_ftype_pcvoid_unsigned_unsigned,
13760 IX86_BUILTIN_MONITOR);
13761 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13762 void_ftype_unsigned_unsigned,
13763 IX86_BUILTIN_MWAIT);
13764 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13766 IX86_BUILTIN_MOVSHDUP);
13767 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13769 IX86_BUILTIN_MOVSLDUP);
13770 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13771 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13772 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13773 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13774 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13775 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13778 /* Errors in the source file can cause expand_expr to return const0_rtx
13779 where we expect a vector. To avoid crashing, use one of the vector
13780 clear instructions. */
13782 safe_vector_operand (rtx x, enum machine_mode mode)
13784 if (x != const0_rtx)
13786 x = gen_reg_rtx (mode);
13788 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13789 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13790 : gen_rtx_SUBREG (DImode, x, 0)));
13792 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13793 : gen_rtx_SUBREG (V4SFmode, x, 0),
13794 CONST0_RTX (V4SFmode)));
13798 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13801 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13804 tree arg0 = TREE_VALUE (arglist);
13805 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13806 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13807 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13808 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13809 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13810 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13812 if (VECTOR_MODE_P (mode0))
13813 op0 = safe_vector_operand (op0, mode0);
13814 if (VECTOR_MODE_P (mode1))
13815 op1 = safe_vector_operand (op1, mode1);
13818 || GET_MODE (target) != tmode
13819 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13820 target = gen_reg_rtx (tmode);
13822 if (GET_MODE (op1) == SImode && mode1 == TImode)
13824 rtx x = gen_reg_rtx (V4SImode);
13825 emit_insn (gen_sse2_loadd (x, op1));
13826 op1 = gen_lowpart (TImode, x);
13829 /* In case the insn wants input operands in modes different from
13830 the result, abort. */
13831 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13832 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13836 op0 = copy_to_mode_reg (mode0, op0);
13837 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13838 op1 = copy_to_mode_reg (mode1, op1);
13840 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13841 yet one of the two must not be a memory. This is normally enforced
13842 by expanders, but we didn't bother to create one here. */
13843 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13844 op0 = copy_to_mode_reg (mode0, op0);
13846 pat = GEN_FCN (icode) (target, op0, op1);
13853 /* Subroutine of ix86_expand_builtin to take care of stores. */
13856 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13859 tree arg0 = TREE_VALUE (arglist);
13860 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13861 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13862 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13863 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13864 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13866 if (VECTOR_MODE_P (mode1))
13867 op1 = safe_vector_operand (op1, mode1);
13869 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13870 op1 = copy_to_mode_reg (mode1, op1);
13872 pat = GEN_FCN (icode) (op0, op1);
13878 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13881 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13882 rtx target, int do_load)
13885 tree arg0 = TREE_VALUE (arglist);
13886 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13887 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13888 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13891 || GET_MODE (target) != tmode
13892 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13893 target = gen_reg_rtx (tmode);
13895 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13898 if (VECTOR_MODE_P (mode0))
13899 op0 = safe_vector_operand (op0, mode0);
13901 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13902 op0 = copy_to_mode_reg (mode0, op0);
13905 pat = GEN_FCN (icode) (target, op0);
13912 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13913 sqrtss, rsqrtss, rcpss. */
13916 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13919 tree arg0 = TREE_VALUE (arglist);
13920 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13921 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13922 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13925 || GET_MODE (target) != tmode
13926 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13927 target = gen_reg_rtx (tmode);
13929 if (VECTOR_MODE_P (mode0))
13930 op0 = safe_vector_operand (op0, mode0);
13932 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13933 op0 = copy_to_mode_reg (mode0, op0);
13936 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13937 op1 = copy_to_mode_reg (mode0, op1);
13939 pat = GEN_FCN (icode) (target, op0, op1);
13946 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13949 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13953 tree arg0 = TREE_VALUE (arglist);
13954 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13955 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13956 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13958 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13959 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13960 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13961 enum rtx_code comparison = d->comparison;
13963 if (VECTOR_MODE_P (mode0))
13964 op0 = safe_vector_operand (op0, mode0);
13965 if (VECTOR_MODE_P (mode1))
13966 op1 = safe_vector_operand (op1, mode1);
13968 /* Swap operands if we have a comparison that isn't available in
13972 rtx tmp = gen_reg_rtx (mode1);
13973 emit_move_insn (tmp, op1);
13979 || GET_MODE (target) != tmode
13980 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13981 target = gen_reg_rtx (tmode);
13983 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13984 op0 = copy_to_mode_reg (mode0, op0);
13985 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13986 op1 = copy_to_mode_reg (mode1, op1);
13988 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13989 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13996 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13999 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14003 tree arg0 = TREE_VALUE (arglist);
14004 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14005 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14006 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14008 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14009 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14010 enum rtx_code comparison = d->comparison;
14012 if (VECTOR_MODE_P (mode0))
14013 op0 = safe_vector_operand (op0, mode0);
14014 if (VECTOR_MODE_P (mode1))
14015 op1 = safe_vector_operand (op1, mode1);
14017 /* Swap operands if we have a comparison that isn't available in
14026 target = gen_reg_rtx (SImode);
14027 emit_move_insn (target, const0_rtx);
14028 target = gen_rtx_SUBREG (QImode, target, 0);
14030 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14031 op0 = copy_to_mode_reg (mode0, op0);
14032 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14033 op1 = copy_to_mode_reg (mode1, op1);
14035 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14036 pat = GEN_FCN (d->icode) (op0, op1);
14040 emit_insn (gen_rtx_SET (VOIDmode,
14041 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14042 gen_rtx_fmt_ee (comparison, QImode,
14046 return SUBREG_REG (target);
14049 /* Expand an expression EXP that calls a built-in function,
14050 with result going to TARGET if that's convenient
14051 (and in mode MODE if that's convenient).
14052 SUBTARGET may be used as the target for computing one of EXP's operands.
14053 IGNORE is nonzero if the value is to be ignored. */
14056 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14057 enum machine_mode mode ATTRIBUTE_UNUSED,
14058 int ignore ATTRIBUTE_UNUSED)
14060 const struct builtin_description *d;
14062 enum insn_code icode;
14063 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14064 tree arglist = TREE_OPERAND (exp, 1);
14065 tree arg0, arg1, arg2;
14066 rtx op0, op1, op2, pat;
14067 enum machine_mode tmode, mode0, mode1, mode2;
14068 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14072 case IX86_BUILTIN_EMMS:
14073 emit_insn (gen_emms ());
14076 case IX86_BUILTIN_SFENCE:
14077 emit_insn (gen_sfence ());
14080 case IX86_BUILTIN_PEXTRW:
14081 case IX86_BUILTIN_PEXTRW128:
14082 icode = (fcode == IX86_BUILTIN_PEXTRW
14083 ? CODE_FOR_mmx_pextrw
14084 : CODE_FOR_sse2_pextrw);
14085 arg0 = TREE_VALUE (arglist);
14086 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14087 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14088 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14089 tmode = insn_data[icode].operand[0].mode;
14090 mode0 = insn_data[icode].operand[1].mode;
14091 mode1 = insn_data[icode].operand[2].mode;
14093 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14094 op0 = copy_to_mode_reg (mode0, op0);
14095 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14097 error ("selector must be an integer constant in the range 0..%i",
14098 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14099 return gen_reg_rtx (tmode);
14102 || GET_MODE (target) != tmode
14103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14104 target = gen_reg_rtx (tmode);
14105 pat = GEN_FCN (icode) (target, op0, op1);
14111 case IX86_BUILTIN_PINSRW:
14112 case IX86_BUILTIN_PINSRW128:
14113 icode = (fcode == IX86_BUILTIN_PINSRW
14114 ? CODE_FOR_mmx_pinsrw
14115 : CODE_FOR_sse2_pinsrw);
14116 arg0 = TREE_VALUE (arglist);
14117 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14118 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14119 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14120 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14121 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14122 tmode = insn_data[icode].operand[0].mode;
14123 mode0 = insn_data[icode].operand[1].mode;
14124 mode1 = insn_data[icode].operand[2].mode;
14125 mode2 = insn_data[icode].operand[3].mode;
14127 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14128 op0 = copy_to_mode_reg (mode0, op0);
14129 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14130 op1 = copy_to_mode_reg (mode1, op1);
14131 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14133 error ("selector must be an integer constant in the range 0..%i",
14134 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14138 || GET_MODE (target) != tmode
14139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14140 target = gen_reg_rtx (tmode);
14141 pat = GEN_FCN (icode) (target, op0, op1, op2);
14147 case IX86_BUILTIN_MASKMOVQ:
14148 case IX86_BUILTIN_MASKMOVDQU:
14149 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14150 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14151 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14152 : CODE_FOR_sse2_maskmovdqu));
14153 /* Note the arg order is different from the operand order. */
14154 arg1 = TREE_VALUE (arglist);
14155 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14156 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14157 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14158 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14159 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14160 mode0 = insn_data[icode].operand[0].mode;
14161 mode1 = insn_data[icode].operand[1].mode;
14162 mode2 = insn_data[icode].operand[2].mode;
14164 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14165 op0 = copy_to_mode_reg (mode0, op0);
14166 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14167 op1 = copy_to_mode_reg (mode1, op1);
14168 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14169 op2 = copy_to_mode_reg (mode2, op2);
14170 pat = GEN_FCN (icode) (op0, op1, op2);
14176 case IX86_BUILTIN_SQRTSS:
14177 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14178 case IX86_BUILTIN_RSQRTSS:
14179 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14180 case IX86_BUILTIN_RCPSS:
14181 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14183 case IX86_BUILTIN_LOADAPS:
14184 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14186 case IX86_BUILTIN_LOADUPS:
14187 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14189 case IX86_BUILTIN_STOREAPS:
14190 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14192 case IX86_BUILTIN_STOREUPS:
14193 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14195 case IX86_BUILTIN_LOADSS:
14196 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14198 case IX86_BUILTIN_STORESS:
14199 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14201 case IX86_BUILTIN_LOADHPS:
14202 case IX86_BUILTIN_LOADLPS:
14203 case IX86_BUILTIN_LOADHPD:
14204 case IX86_BUILTIN_LOADLPD:
14205 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14206 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14207 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14208 : CODE_FOR_sse2_movsd);
14209 arg0 = TREE_VALUE (arglist);
14210 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14211 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14212 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14213 tmode = insn_data[icode].operand[0].mode;
14214 mode0 = insn_data[icode].operand[1].mode;
14215 mode1 = insn_data[icode].operand[2].mode;
14217 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14218 op0 = copy_to_mode_reg (mode0, op0);
14219 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14221 || GET_MODE (target) != tmode
14222 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14223 target = gen_reg_rtx (tmode);
14224 pat = GEN_FCN (icode) (target, op0, op1);
14230 case IX86_BUILTIN_STOREHPS:
14231 case IX86_BUILTIN_STORELPS:
14232 case IX86_BUILTIN_STOREHPD:
14233 case IX86_BUILTIN_STORELPD:
14234 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14235 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14236 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14237 : CODE_FOR_sse2_movsd);
14238 arg0 = TREE_VALUE (arglist);
14239 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14240 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14241 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14242 mode0 = insn_data[icode].operand[1].mode;
14243 mode1 = insn_data[icode].operand[2].mode;
14245 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14246 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14247 op1 = copy_to_mode_reg (mode1, op1);
14249 pat = GEN_FCN (icode) (op0, op0, op1);
14255 case IX86_BUILTIN_MOVNTPS:
14256 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14257 case IX86_BUILTIN_MOVNTQ:
14258 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14260 case IX86_BUILTIN_LDMXCSR:
14261 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14262 target = assign_386_stack_local (SImode, 0);
14263 emit_move_insn (target, op0);
14264 emit_insn (gen_ldmxcsr (target));
14267 case IX86_BUILTIN_STMXCSR:
14268 target = assign_386_stack_local (SImode, 0);
14269 emit_insn (gen_stmxcsr (target));
14270 return copy_to_mode_reg (SImode, target);
14272 case IX86_BUILTIN_SHUFPS:
14273 case IX86_BUILTIN_SHUFPD:
14274 icode = (fcode == IX86_BUILTIN_SHUFPS
14275 ? CODE_FOR_sse_shufps
14276 : CODE_FOR_sse2_shufpd);
14277 arg0 = TREE_VALUE (arglist);
14278 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14279 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14280 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14281 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14282 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14283 tmode = insn_data[icode].operand[0].mode;
14284 mode0 = insn_data[icode].operand[1].mode;
14285 mode1 = insn_data[icode].operand[2].mode;
14286 mode2 = insn_data[icode].operand[3].mode;
14288 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14289 op0 = copy_to_mode_reg (mode0, op0);
14290 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14291 op1 = copy_to_mode_reg (mode1, op1);
14292 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14294 /* @@@ better error message */
14295 error ("mask must be an immediate");
14296 return gen_reg_rtx (tmode);
14299 || GET_MODE (target) != tmode
14300 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14301 target = gen_reg_rtx (tmode);
14302 pat = GEN_FCN (icode) (target, op0, op1, op2);
14308 case IX86_BUILTIN_PSHUFW:
14309 case IX86_BUILTIN_PSHUFD:
14310 case IX86_BUILTIN_PSHUFHW:
14311 case IX86_BUILTIN_PSHUFLW:
14312 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14313 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14314 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14315 : CODE_FOR_mmx_pshufw);
14316 arg0 = TREE_VALUE (arglist);
14317 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14318 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14319 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14320 tmode = insn_data[icode].operand[0].mode;
14321 mode1 = insn_data[icode].operand[1].mode;
14322 mode2 = insn_data[icode].operand[2].mode;
14324 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14325 op0 = copy_to_mode_reg (mode1, op0);
14326 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14328 /* @@@ better error message */
14329 error ("mask must be an immediate");
14333 || GET_MODE (target) != tmode
14334 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14335 target = gen_reg_rtx (tmode);
14336 pat = GEN_FCN (icode) (target, op0, op1);
14342 case IX86_BUILTIN_PSLLDQI128:
14343 case IX86_BUILTIN_PSRLDQI128:
14344 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14345 : CODE_FOR_sse2_lshrti3);
14346 arg0 = TREE_VALUE (arglist);
14347 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14348 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14349 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14350 tmode = insn_data[icode].operand[0].mode;
14351 mode1 = insn_data[icode].operand[1].mode;
14352 mode2 = insn_data[icode].operand[2].mode;
14354 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14356 op0 = copy_to_reg (op0);
14357 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14361 error ("shift must be an immediate");
14364 target = gen_reg_rtx (V2DImode);
14365 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14371 case IX86_BUILTIN_FEMMS:
14372 emit_insn (gen_femms ());
14375 case IX86_BUILTIN_PAVGUSB:
14376 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14378 case IX86_BUILTIN_PF2ID:
14379 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14381 case IX86_BUILTIN_PFACC:
14382 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14384 case IX86_BUILTIN_PFADD:
14385 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14387 case IX86_BUILTIN_PFCMPEQ:
14388 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14390 case IX86_BUILTIN_PFCMPGE:
14391 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14393 case IX86_BUILTIN_PFCMPGT:
14394 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14396 case IX86_BUILTIN_PFMAX:
14397 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14399 case IX86_BUILTIN_PFMIN:
14400 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14402 case IX86_BUILTIN_PFMUL:
14403 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14405 case IX86_BUILTIN_PFRCP:
14406 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14408 case IX86_BUILTIN_PFRCPIT1:
14409 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14411 case IX86_BUILTIN_PFRCPIT2:
14412 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14414 case IX86_BUILTIN_PFRSQIT1:
14415 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14417 case IX86_BUILTIN_PFRSQRT:
14418 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14420 case IX86_BUILTIN_PFSUB:
14421 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14423 case IX86_BUILTIN_PFSUBR:
14424 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14426 case IX86_BUILTIN_PI2FD:
14427 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14429 case IX86_BUILTIN_PMULHRW:
14430 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14432 case IX86_BUILTIN_PF2IW:
14433 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14435 case IX86_BUILTIN_PFNACC:
14436 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14438 case IX86_BUILTIN_PFPNACC:
14439 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14441 case IX86_BUILTIN_PI2FW:
14442 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14444 case IX86_BUILTIN_PSWAPDSI:
14445 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14447 case IX86_BUILTIN_PSWAPDSF:
14448 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14450 case IX86_BUILTIN_SSE_ZERO:
14451 target = gen_reg_rtx (V4SFmode);
14452 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14455 case IX86_BUILTIN_MMX_ZERO:
14456 target = gen_reg_rtx (DImode);
14457 emit_insn (gen_mmx_clrdi (target));
14460 case IX86_BUILTIN_CLRTI:
14461 target = gen_reg_rtx (V2DImode);
14462 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14466 case IX86_BUILTIN_SQRTSD:
14467 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14468 case IX86_BUILTIN_LOADAPD:
14469 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14470 case IX86_BUILTIN_LOADUPD:
14471 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14473 case IX86_BUILTIN_STOREAPD:
14474 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14475 case IX86_BUILTIN_STOREUPD:
14476 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14478 case IX86_BUILTIN_LOADSD:
14479 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14481 case IX86_BUILTIN_STORESD:
14482 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14484 case IX86_BUILTIN_SETPD1:
14485 target = assign_386_stack_local (DFmode, 0);
14486 arg0 = TREE_VALUE (arglist);
14487 emit_move_insn (adjust_address (target, DFmode, 0),
14488 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14489 op0 = gen_reg_rtx (V2DFmode);
14490 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14491 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14494 case IX86_BUILTIN_SETPD:
14495 target = assign_386_stack_local (V2DFmode, 0);
14496 arg0 = TREE_VALUE (arglist);
14497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14498 emit_move_insn (adjust_address (target, DFmode, 0),
14499 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14500 emit_move_insn (adjust_address (target, DFmode, 8),
14501 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14502 op0 = gen_reg_rtx (V2DFmode);
14503 emit_insn (gen_sse2_movapd (op0, target));
14506 case IX86_BUILTIN_LOADRPD:
14507 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14508 gen_reg_rtx (V2DFmode), 1);
14509 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14512 case IX86_BUILTIN_LOADPD1:
14513 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14514 gen_reg_rtx (V2DFmode), 1);
14515 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14518 case IX86_BUILTIN_STOREPD1:
14519 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14520 case IX86_BUILTIN_STORERPD:
14521 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14523 case IX86_BUILTIN_CLRPD:
14524 target = gen_reg_rtx (V2DFmode);
14525 emit_insn (gen_sse_clrv2df (target));
14528 case IX86_BUILTIN_MFENCE:
14529 emit_insn (gen_sse2_mfence ());
14531 case IX86_BUILTIN_LFENCE:
14532 emit_insn (gen_sse2_lfence ());
14535 case IX86_BUILTIN_CLFLUSH:
14536 arg0 = TREE_VALUE (arglist);
14537 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14538 icode = CODE_FOR_sse2_clflush;
14539 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14540 op0 = copy_to_mode_reg (Pmode, op0);
14542 emit_insn (gen_sse2_clflush (op0));
14545 case IX86_BUILTIN_MOVNTPD:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14547 case IX86_BUILTIN_MOVNTDQ:
14548 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14549 case IX86_BUILTIN_MOVNTI:
14550 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14552 case IX86_BUILTIN_LOADDQA:
14553 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14554 case IX86_BUILTIN_LOADDQU:
14555 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14556 case IX86_BUILTIN_LOADD:
14557 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14559 case IX86_BUILTIN_STOREDQA:
14560 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14561 case IX86_BUILTIN_STOREDQU:
14562 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14563 case IX86_BUILTIN_STORED:
14564 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14566 case IX86_BUILTIN_MONITOR:
14567 arg0 = TREE_VALUE (arglist);
14568 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14569 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14570 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14571 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14572 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14574 op0 = copy_to_mode_reg (SImode, op0);
14576 op1 = copy_to_mode_reg (SImode, op1);
14578 op2 = copy_to_mode_reg (SImode, op2);
14579 emit_insn (gen_monitor (op0, op1, op2));
14582 case IX86_BUILTIN_MWAIT:
14583 arg0 = TREE_VALUE (arglist);
14584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14585 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14586 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14588 op0 = copy_to_mode_reg (SImode, op0);
14590 op1 = copy_to_mode_reg (SImode, op1);
14591 emit_insn (gen_mwait (op0, op1));
14594 case IX86_BUILTIN_LOADDDUP:
14595 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14597 case IX86_BUILTIN_LDDQU:
14598 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14605 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14606 if (d->code == fcode)
14608 /* Compares are treated specially. */
14609 if (d->icode == CODE_FOR_maskcmpv4sf3
14610 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14611 || d->icode == CODE_FOR_maskncmpv4sf3
14612 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14613 || d->icode == CODE_FOR_maskcmpv2df3
14614 || d->icode == CODE_FOR_vmmaskcmpv2df3
14615 || d->icode == CODE_FOR_maskncmpv2df3
14616 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14617 return ix86_expand_sse_compare (d, arglist, target);
14619 return ix86_expand_binop_builtin (d->icode, arglist, target);
14622 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14623 if (d->code == fcode)
14624 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14626 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14627 if (d->code == fcode)
14628 return ix86_expand_sse_comi (d, arglist, target);
14630 /* @@@ Should really do something sensible here. */
14634 /* Store OPERAND to the memory after reload is completed. This means
14635 that we can't easily use assign_stack_local. */
14637 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14640 if (!reload_completed)
14642 if (TARGET_RED_ZONE)
14644 result = gen_rtx_MEM (mode,
14645 gen_rtx_PLUS (Pmode,
14647 GEN_INT (-RED_ZONE_SIZE)));
14648 emit_move_insn (result, operand);
14650 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14656 operand = gen_lowpart (DImode, operand);
14660 gen_rtx_SET (VOIDmode,
14661 gen_rtx_MEM (DImode,
14662 gen_rtx_PRE_DEC (DImode,
14663 stack_pointer_rtx)),
14669 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14678 split_di (&operand, 1, operands, operands + 1);
14680 gen_rtx_SET (VOIDmode,
14681 gen_rtx_MEM (SImode,
14682 gen_rtx_PRE_DEC (Pmode,
14683 stack_pointer_rtx)),
14686 gen_rtx_SET (VOIDmode,
14687 gen_rtx_MEM (SImode,
14688 gen_rtx_PRE_DEC (Pmode,
14689 stack_pointer_rtx)),
14694 /* It is better to store HImodes as SImodes. */
14695 if (!TARGET_PARTIAL_REG_STALL)
14696 operand = gen_lowpart (SImode, operand);
14700 gen_rtx_SET (VOIDmode,
14701 gen_rtx_MEM (GET_MODE (operand),
14702 gen_rtx_PRE_DEC (SImode,
14703 stack_pointer_rtx)),
14709 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14714 /* Free operand from the memory. */
14716 ix86_free_from_memory (enum machine_mode mode)
14718 if (!TARGET_RED_ZONE)
14722 if (mode == DImode || TARGET_64BIT)
14724 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14728 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14729 to pop or add instruction if registers are available. */
14730 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14731 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14736 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14737 QImode must go into class Q_REGS.
14738 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14739 movdf to do mem-to-mem moves through integer regs. */
14741 ix86_preferred_reload_class (rtx x, enum reg_class class)
14743 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14745 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14747 /* SSE can't load any constant directly yet. */
14748 if (SSE_CLASS_P (class))
14750 /* Floats can load 0 and 1. */
14751 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14753 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14754 if (MAYBE_SSE_CLASS_P (class))
14755 return (reg_class_subset_p (class, GENERAL_REGS)
14756 ? GENERAL_REGS : FLOAT_REGS);
14760 /* General regs can load everything. */
14761 if (reg_class_subset_p (class, GENERAL_REGS))
14762 return GENERAL_REGS;
14763 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14764 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14767 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14769 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14774 /* If we are copying between general and FP registers, we need a memory
14775 location. The same is true for SSE and MMX registers.
14777 The macro can't work reliably when one of the CLASSES is class containing
14778 registers from multiple units (SSE, MMX, integer). We avoid this by never
14779 combining those units in single alternative in the machine description.
14780 Ensure that this constraint holds to avoid unexpected surprises.
14782 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14783 enforce these sanity checks. */
14785 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14786 enum machine_mode mode, int strict)
14788 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14789 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14790 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14791 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14792 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14793 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14800 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14801 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14802 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14803 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14804 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14806 /* Return the cost of moving data from a register in class CLASS1 to
14807 one in class CLASS2.
14809 It is not required that the cost always equal 2 when FROM is the same as TO;
14810 on some machines it is expensive to move between registers if they are not
14811 general registers. */
14813 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14814 enum reg_class class2)
14816 /* In case we require secondary memory, compute cost of the store followed
14817 by load. In order to avoid bad register allocation choices, we need
14818 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14820 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14824 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14825 MEMORY_MOVE_COST (mode, class1, 1));
14826 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14827 MEMORY_MOVE_COST (mode, class2, 1));
14829 /* In case of copying from general_purpose_register we may emit multiple
14830 stores followed by single load causing memory size mismatch stall.
14831 Count this as arbitrarily high cost of 20. */
14832 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14835 /* In the case of FP/MMX moves, the registers actually overlap, and we
14836 have to switch modes in order to treat them differently. */
14837 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14838 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14844 /* Moves between SSE/MMX and integer unit are expensive. */
14845 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14846 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14847 return ix86_cost->mmxsse_to_integer;
14848 if (MAYBE_FLOAT_CLASS_P (class1))
14849 return ix86_cost->fp_move;
14850 if (MAYBE_SSE_CLASS_P (class1))
14851 return ix86_cost->sse_move;
14852 if (MAYBE_MMX_CLASS_P (class1))
14853 return ix86_cost->mmx_move;
14857 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14859 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14861 /* Flags and only flags can only hold CCmode values. */
14862 if (CC_REGNO_P (regno))
14863 return GET_MODE_CLASS (mode) == MODE_CC;
14864 if (GET_MODE_CLASS (mode) == MODE_CC
14865 || GET_MODE_CLASS (mode) == MODE_RANDOM
14866 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14868 if (FP_REGNO_P (regno))
14869 return VALID_FP_MODE_P (mode);
14870 if (SSE_REGNO_P (regno))
14871 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14872 if (MMX_REGNO_P (regno))
14874 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14875 /* We handle both integer and floats in the general purpose registers.
14876 In future we should be able to handle vector modes as well. */
14877 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14879 /* Take care for QImode values - they can be in non-QI regs, but then
14880 they do cause partial register stalls. */
14881 if (regno < 4 || mode != QImode || TARGET_64BIT)
14883 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14886 /* Return the cost of moving data of mode M between a
14887 register and memory. A value of 2 is the default; this cost is
14888 relative to those in `REGISTER_MOVE_COST'.
14890 If moving between registers and memory is more expensive than
14891 between two registers, you should define this macro to express the
14894 Model also increased moving costs of QImode registers in non
14898 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14900 if (FLOAT_CLASS_P (class))
14917 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14919 if (SSE_CLASS_P (class))
14922 switch (GET_MODE_SIZE (mode))
14936 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14938 if (MMX_CLASS_P (class))
14941 switch (GET_MODE_SIZE (mode))
14952 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14954 switch (GET_MODE_SIZE (mode))
14958 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14959 : ix86_cost->movzbl_load);
14961 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14962 : ix86_cost->int_store[0] + 4);
14965 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14967 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14968 if (mode == TFmode)
14970 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14971 * (((int) GET_MODE_SIZE (mode)
14972 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14976 /* Compute a (partial) cost for rtx X. Return true if the complete
14977 cost has been computed, and false if subexpressions should be
14978 scanned. In either case, *TOTAL contains the cost result. */
14981 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14983 enum machine_mode mode = GET_MODE (x);
14991 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14993 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14995 else if (flag_pic && SYMBOLIC_CONST (x)
14997 || (!GET_CODE (x) != LABEL_REF
14998 && (GET_CODE (x) != SYMBOL_REF
14999 || !SYMBOL_REF_LOCAL_P (x)))))
15006 if (mode == VOIDmode)
15009 switch (standard_80387_constant_p (x))
15014 default: /* Other constants */
15019 /* Start with (MEM (SYMBOL_REF)), since that's where
15020 it'll probably end up. Add a penalty for size. */
15021 *total = (COSTS_N_INSNS (1)
15022 + (flag_pic != 0 && !TARGET_64BIT)
15023 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15029 /* The zero extensions is often completely free on x86_64, so make
15030 it as cheap as possible. */
15031 if (TARGET_64BIT && mode == DImode
15032 && GET_MODE (XEXP (x, 0)) == SImode)
15034 else if (TARGET_ZERO_EXTEND_WITH_AND)
15035 *total = COSTS_N_INSNS (ix86_cost->add);
15037 *total = COSTS_N_INSNS (ix86_cost->movzx);
15041 *total = COSTS_N_INSNS (ix86_cost->movsx);
15045 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15046 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15048 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15051 *total = COSTS_N_INSNS (ix86_cost->add);
15054 if ((value == 2 || value == 3)
15055 && !TARGET_DECOMPOSE_LEA
15056 && ix86_cost->lea <= ix86_cost->shift_const)
15058 *total = COSTS_N_INSNS (ix86_cost->lea);
15068 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15070 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15072 if (INTVAL (XEXP (x, 1)) > 32)
15073 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15075 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15079 if (GET_CODE (XEXP (x, 1)) == AND)
15080 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15082 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15087 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15088 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15090 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15095 if (FLOAT_MODE_P (mode))
15096 *total = COSTS_N_INSNS (ix86_cost->fmul);
15097 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15099 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15102 for (nbits = 0; value != 0; value >>= 1)
15105 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15106 + nbits * ix86_cost->mult_bit);
15110 /* This is arbitrary */
15111 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15112 + 7 * ix86_cost->mult_bit);
15120 if (FLOAT_MODE_P (mode))
15121 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15123 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15127 if (FLOAT_MODE_P (mode))
15128 *total = COSTS_N_INSNS (ix86_cost->fadd);
15129 else if (!TARGET_DECOMPOSE_LEA
15130 && GET_MODE_CLASS (mode) == MODE_INT
15131 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15133 if (GET_CODE (XEXP (x, 0)) == PLUS
15134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15135 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15136 && CONSTANT_P (XEXP (x, 1)))
15138 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15139 if (val == 2 || val == 4 || val == 8)
15141 *total = COSTS_N_INSNS (ix86_cost->lea);
15142 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15143 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15145 *total += rtx_cost (XEXP (x, 1), outer_code);
15149 else if (GET_CODE (XEXP (x, 0)) == MULT
15150 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15152 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15153 if (val == 2 || val == 4 || val == 8)
15155 *total = COSTS_N_INSNS (ix86_cost->lea);
15156 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15157 *total += rtx_cost (XEXP (x, 1), outer_code);
15161 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15163 *total = COSTS_N_INSNS (ix86_cost->lea);
15164 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15165 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15166 *total += rtx_cost (XEXP (x, 1), outer_code);
15173 if (FLOAT_MODE_P (mode))
15175 *total = COSTS_N_INSNS (ix86_cost->fadd);
15183 if (!TARGET_64BIT && mode == DImode)
15185 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15186 + (rtx_cost (XEXP (x, 0), outer_code)
15187 << (GET_MODE (XEXP (x, 0)) != DImode))
15188 + (rtx_cost (XEXP (x, 1), outer_code)
15189 << (GET_MODE (XEXP (x, 1)) != DImode)));
15195 if (FLOAT_MODE_P (mode))
15197 *total = COSTS_N_INSNS (ix86_cost->fchs);
15203 if (!TARGET_64BIT && mode == DImode)
15204 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15206 *total = COSTS_N_INSNS (ix86_cost->add);
15210 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15215 if (FLOAT_MODE_P (mode))
15216 *total = COSTS_N_INSNS (ix86_cost->fabs);
15220 if (FLOAT_MODE_P (mode))
15221 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15225 if (XINT (x, 1) == UNSPEC_TP)
15234 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15236 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15239 fputs ("\tpushl $", asm_out_file);
15240 assemble_name (asm_out_file, XSTR (symbol, 0));
15241 fputc ('\n', asm_out_file);
15247 static int current_machopic_label_num;
15249 /* Given a symbol name and its associated stub, write out the
15250 definition of the stub. */
15253 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15255 unsigned int length;
15256 char *binder_name, *symbol_name, lazy_ptr_name[32];
15257 int label = ++current_machopic_label_num;
15259 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15260 symb = (*targetm.strip_name_encoding) (symb);
15262 length = strlen (stub);
15263 binder_name = alloca (length + 32);
15264 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15266 length = strlen (symb);
15267 symbol_name = alloca (length + 32);
15268 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15270 sprintf (lazy_ptr_name, "L%d$lz", label);
15273 machopic_picsymbol_stub_section ();
15275 machopic_symbol_stub_section ();
15277 fprintf (file, "%s:\n", stub);
15278 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15282 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15283 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15284 fprintf (file, "\tjmp %%edx\n");
15287 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15289 fprintf (file, "%s:\n", binder_name);
15293 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15294 fprintf (file, "\tpushl %%eax\n");
15297 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15299 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15301 machopic_lazy_symbol_ptr_section ();
15302 fprintf (file, "%s:\n", lazy_ptr_name);
15303 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15304 fprintf (file, "\t.long %s\n", binder_name);
15306 #endif /* TARGET_MACHO */
15308 /* Order the registers for register allocator. */
15311 x86_order_regs_for_local_alloc (void)
15316 /* First allocate the local general purpose registers. */
15317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15318 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15319 reg_alloc_order [pos++] = i;
15321 /* Global general purpose registers. */
15322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15323 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15324 reg_alloc_order [pos++] = i;
15326 /* x87 registers come first in case we are doing FP math
15328 if (!TARGET_SSE_MATH)
15329 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15330 reg_alloc_order [pos++] = i;
15332 /* SSE registers. */
15333 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15334 reg_alloc_order [pos++] = i;
15335 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15336 reg_alloc_order [pos++] = i;
15338 /* x87 registers. */
15339 if (TARGET_SSE_MATH)
15340 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15341 reg_alloc_order [pos++] = i;
15343 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15344 reg_alloc_order [pos++] = i;
15346 /* Initialize the rest of array as we do not allocate some registers
15348 while (pos < FIRST_PSEUDO_REGISTER)
15349 reg_alloc_order [pos++] = 0;
15352 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15353 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15356 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15357 struct attribute_spec.handler. */
15359 ix86_handle_struct_attribute (tree *node, tree name,
15360 tree args ATTRIBUTE_UNUSED,
15361 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15364 if (DECL_P (*node))
15366 if (TREE_CODE (*node) == TYPE_DECL)
15367 type = &TREE_TYPE (*node);
15372 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15373 || TREE_CODE (*type) == UNION_TYPE)))
15375 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15376 *no_add_attrs = true;
15379 else if ((is_attribute_p ("ms_struct", name)
15380 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15381 || ((is_attribute_p ("gcc_struct", name)
15382 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15384 warning ("`%s' incompatible attribute ignored",
15385 IDENTIFIER_POINTER (name));
15386 *no_add_attrs = true;
15393 ix86_ms_bitfield_layout_p (tree record_type)
15395 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15396 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15397 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15400 /* Returns an expression indicating where the this parameter is
15401 located on entry to the FUNCTION. */
15404 x86_this_parameter (tree function)
15406 tree type = TREE_TYPE (function);
15410 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15411 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15414 if (ix86_function_regparm (type, function) > 0)
15418 parm = TYPE_ARG_TYPES (type);
15419 /* Figure out whether or not the function has a variable number of
15421 for (; parm; parm = TREE_CHAIN (parm))
15422 if (TREE_VALUE (parm) == void_type_node)
15424 /* If not, the this parameter is in the first argument. */
15428 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15430 return gen_rtx_REG (SImode, regno);
15434 if (aggregate_value_p (TREE_TYPE (type), type))
15435 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15437 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15440 /* Determine whether x86_output_mi_thunk can succeed. */
15443 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15444 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15445 HOST_WIDE_INT vcall_offset, tree function)
15447 /* 64-bit can handle anything. */
15451 /* For 32-bit, everything's fine if we have one free register. */
15452 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15455 /* Need a free register for vcall_offset. */
15459 /* Need a free register for GOT references. */
15460 if (flag_pic && !(*targetm.binds_local_p) (function))
15463 /* Otherwise ok. */
15467 /* Output the assembler code for a thunk function. THUNK_DECL is the
15468 declaration for the thunk function itself, FUNCTION is the decl for
15469 the target function. DELTA is an immediate constant offset to be
15470 added to THIS. If VCALL_OFFSET is nonzero, the word at
15471 *(*this + vcall_offset) should be added to THIS. */
15474 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15475 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15476 HOST_WIDE_INT vcall_offset, tree function)
15479 rtx this = x86_this_parameter (function);
15482 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15483 pull it in now and let DELTA benefit. */
15486 else if (vcall_offset)
15488 /* Put the this parameter into %eax. */
15490 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15491 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15494 this_reg = NULL_RTX;
15496 /* Adjust the this parameter by a fixed constant. */
15499 xops[0] = GEN_INT (delta);
15500 xops[1] = this_reg ? this_reg : this;
15503 if (!x86_64_general_operand (xops[0], DImode))
15505 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15507 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15511 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15514 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15517 /* Adjust the this parameter by a value stored in the vtable. */
15521 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15524 int tmp_regno = 2 /* ECX */;
15525 if (lookup_attribute ("fastcall",
15526 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15527 tmp_regno = 0 /* EAX */;
15528 tmp = gen_rtx_REG (SImode, tmp_regno);
15531 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15534 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15536 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15538 /* Adjust the this parameter. */
15539 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15540 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15542 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15543 xops[0] = GEN_INT (vcall_offset);
15545 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15546 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15548 xops[1] = this_reg;
15550 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15552 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15555 /* If necessary, drop THIS back to its stack slot. */
15556 if (this_reg && this_reg != this)
15558 xops[0] = this_reg;
15560 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15563 xops[0] = XEXP (DECL_RTL (function), 0);
15566 if (!flag_pic || (*targetm.binds_local_p) (function))
15567 output_asm_insn ("jmp\t%P0", xops);
15570 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15571 tmp = gen_rtx_CONST (Pmode, tmp);
15572 tmp = gen_rtx_MEM (QImode, tmp);
15574 output_asm_insn ("jmp\t%A0", xops);
15579 if (!flag_pic || (*targetm.binds_local_p) (function))
15580 output_asm_insn ("jmp\t%P0", xops);
15585 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15586 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15587 tmp = gen_rtx_MEM (QImode, tmp);
15589 output_asm_insn ("jmp\t%0", xops);
15592 #endif /* TARGET_MACHO */
15594 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15595 output_set_got (tmp);
15598 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15599 output_asm_insn ("jmp\t{*}%1", xops);
15605 x86_file_start (void)
15607 default_file_start ();
15608 if (X86_FILE_START_VERSION_DIRECTIVE)
15609 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15610 if (X86_FILE_START_FLTUSED)
15611 fputs ("\t.global\t__fltused\n", asm_out_file);
15612 if (ix86_asm_dialect == ASM_INTEL)
15613 fputs ("\t.intel_syntax\n", asm_out_file);
15617 x86_field_alignment (tree field, int computed)
15619 enum machine_mode mode;
15620 tree type = TREE_TYPE (field);
15622 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15624 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15625 ? get_inner_array_type (type) : type);
15626 if (mode == DFmode || mode == DCmode
15627 || GET_MODE_CLASS (mode) == MODE_INT
15628 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15629 return MIN (32, computed);
15633 /* Output assembler code to FILE to increment profiler label # LABELNO
15634 for profiling a function entry. */
15636 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15641 #ifndef NO_PROFILE_COUNTERS
15642 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15644 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15648 #ifndef NO_PROFILE_COUNTERS
15649 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15651 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15655 #ifndef NO_PROFILE_COUNTERS
15656 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15657 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15659 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15663 #ifndef NO_PROFILE_COUNTERS
15664 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15665 PROFILE_COUNT_REGISTER);
15667 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15671 /* We don't have exact information about the insn sizes, but we may assume
15672 quite safely that we are informed about all 1 byte insns and memory
15673 address sizes. This is enough to eliminate unnecessary padding in
15677 min_insn_size (rtx insn)
15681 if (!INSN_P (insn) || !active_insn_p (insn))
15684 /* Discard alignments we've emit and jump instructions. */
15685 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15686 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15688 if (GET_CODE (insn) == JUMP_INSN
15689 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15690 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15693 /* Important case - calls are always 5 bytes.
15694 It is common to have many calls in the row. */
15695 if (GET_CODE (insn) == CALL_INSN
15696 && symbolic_reference_mentioned_p (PATTERN (insn))
15697 && !SIBLING_CALL_P (insn))
15699 if (get_attr_length (insn) <= 1)
15702 /* For normal instructions we may rely on the sizes of addresses
15703 and the presence of symbol to require 4 bytes of encoding.
15704 This is not the case for jumps where references are PC relative. */
15705 if (GET_CODE (insn) != JUMP_INSN)
15707 l = get_attr_length_address (insn);
15708 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15717 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15721 ix86_avoid_jump_misspredicts (void)
15723 rtx insn, start = get_insns ();
15724 int nbytes = 0, njumps = 0;
15727 /* Look for all minimal intervals of instructions containing 4 jumps.
15728 The intervals are bounded by START and INSN. NBYTES is the total
15729 size of instructions in the interval including INSN and not including
15730 START. When the NBYTES is smaller than 16 bytes, it is possible
15731 that the end of START and INSN ends up in the same 16byte page.
15733 The smallest offset in the page INSN can start is the case where START
15734 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15735 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15737 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15740 nbytes += min_insn_size (insn);
15742 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15743 INSN_UID (insn), min_insn_size (insn));
15744 if ((GET_CODE (insn) == JUMP_INSN
15745 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15746 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15747 || GET_CODE (insn) == CALL_INSN)
15754 start = NEXT_INSN (start);
15755 if ((GET_CODE (start) == JUMP_INSN
15756 && GET_CODE (PATTERN (start)) != ADDR_VEC
15757 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15758 || GET_CODE (start) == CALL_INSN)
15759 njumps--, isjump = 1;
15762 nbytes -= min_insn_size (start);
15767 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15768 INSN_UID (start), INSN_UID (insn), nbytes);
15770 if (njumps == 3 && isjump && nbytes < 16)
15772 int padsize = 15 - nbytes + min_insn_size (insn);
15775 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15776 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15781 /* AMD Athlon works faster
15782 when RET is not destination of conditional jump or directly preceded
15783 by other jump instruction. We avoid the penalty by inserting NOP just
15784 before the RET instructions in such cases. */
15786 ix86_pad_returns (void)
15790 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15792 basic_block bb = e->src;
15793 rtx ret = BB_END (bb);
15795 bool replace = false;
15797 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15798 || !maybe_hot_bb_p (bb))
15800 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15801 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15803 if (prev && GET_CODE (prev) == CODE_LABEL)
15806 for (e = bb->pred; e; e = e->pred_next)
15807 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15808 && !(e->flags & EDGE_FALLTHRU))
15813 prev = prev_active_insn (ret);
15815 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15816 || GET_CODE (prev) == CALL_INSN))
15818 /* Empty functions get branch mispredict even when the jump destination
15819 is not visible to us. */
15820 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15825 emit_insn_before (gen_return_internal_long (), ret);
15831 /* Implement machine specific optimizations. We implement padding of returns
15832 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15836 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15837 ix86_pad_returns ();
15838 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15839 ix86_avoid_jump_misspredicts ();
15842 /* Return nonzero when QImode register that must be represented via REX prefix
15845 x86_extended_QIreg_mentioned_p (rtx insn)
15848 extract_insn_cached (insn);
15849 for (i = 0; i < recog_data.n_operands; i++)
15850 if (REG_P (recog_data.operand[i])
15851 && REGNO (recog_data.operand[i]) >= 4)
15856 /* Return nonzero when P points to register encoded via REX prefix.
15857 Called via for_each_rtx. */
15859 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15861 unsigned int regno;
15864 regno = REGNO (*p);
15865 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15868 /* Return true when INSN mentions register that must be encoded using REX
15871 x86_extended_reg_mentioned_p (rtx insn)
15873 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15876 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15877 optabs would emit if we didn't have TFmode patterns. */
15880 x86_emit_floatuns (rtx operands[2])
15882 rtx neglab, donelab, i0, i1, f0, in, out;
15883 enum machine_mode mode, inmode;
15885 inmode = GET_MODE (operands[1]);
15886 if (inmode != SImode
15887 && inmode != DImode)
15891 in = force_reg (inmode, operands[1]);
15892 mode = GET_MODE (out);
15893 neglab = gen_label_rtx ();
15894 donelab = gen_label_rtx ();
15895 i1 = gen_reg_rtx (Pmode);
15896 f0 = gen_reg_rtx (mode);
15898 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15900 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15901 emit_jump_insn (gen_jump (donelab));
15904 emit_label (neglab);
15906 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15907 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15908 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15909 expand_float (f0, i0, 0);
15910 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15912 emit_label (donelab);
15915 /* Return if we do not know how to pass TYPE solely in registers. */
15917 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15919 if (default_must_pass_in_stack (mode, type))
15921 return (!TARGET_64BIT && type && mode == TImode);
15924 /* Initialize vector TARGET via VALS. */
15926 ix86_expand_vector_init (rtx target, rtx vals)
15928 enum machine_mode mode = GET_MODE (target);
15929 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15930 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15933 for (i = n_elts - 1; i >= 0; i--)
15934 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15935 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15938 /* Few special cases first...
15939 ... constants are best loaded from constant pool. */
15942 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15946 /* ... values where only first field is non-constant are best loaded
15947 from the pool and overwriten via move later. */
15950 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15951 GET_MODE_INNER (mode), 0);
15953 op = force_reg (mode, op);
15954 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15955 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15956 switch (GET_MODE (target))
15959 emit_insn (gen_sse2_movsd (target, target, op));
15962 emit_insn (gen_sse_movss (target, target, op));
15970 /* And the busy sequence doing rotations. */
15971 switch (GET_MODE (target))
15976 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15978 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15980 vecop0 = force_reg (V2DFmode, vecop0);
15981 vecop1 = force_reg (V2DFmode, vecop1);
15982 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15988 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15990 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15992 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15994 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15995 rtx tmp1 = gen_reg_rtx (V4SFmode);
15996 rtx tmp2 = gen_reg_rtx (V4SFmode);
15998 vecop0 = force_reg (V4SFmode, vecop0);
15999 vecop1 = force_reg (V4SFmode, vecop1);
16000 vecop2 = force_reg (V4SFmode, vecop2);
16001 vecop3 = force_reg (V4SFmode, vecop3);
16002 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16003 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16004 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16012 #include "gt-i386.h"