1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* Some CPU cores are not able to predict more than 4 branch instructions in
528 the 16 byte window. */
529 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
531 /* In case the average insn count for single function invocation is
532 lower than this constant, emit fast (but longer) prologue and
534 #define FAST_PROLOGUE_INSN_COUNT 20
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
547 AREG, DREG, CREG, BREG,
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 #define MAX_386_STACK_LOCALS 3
675 /* Size of the register save area. */
676 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
678 /* Define the structure for the machine field in struct function. */
680 struct stack_local_entry GTY(())
685 struct stack_local_entry *next;
688 /* Structure describing stack frame layout.
689 Stack grows downward:
695 saved frame pointer if frame_pointer_needed
696 <- HARD_FRAME_POINTER
702 > to_allocate <- FRAME_POINTER
714 int outgoing_arguments_size;
717 HOST_WIDE_INT to_allocate;
718 /* The offsets relative to ARG_POINTER. */
719 HOST_WIDE_INT frame_pointer_offset;
720 HOST_WIDE_INT hard_frame_pointer_offset;
721 HOST_WIDE_INT stack_pointer_offset;
723 /* When save_regs_using_mov is set, emit prologue using
724 move instead of push instructions. */
725 bool save_regs_using_mov;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string; /* for -mtune=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand (rtx, enum machine_mode);
789 static int tls_symbolic_operand_1 (rtx, enum tls_model);
790 static void output_pic_addr_const (FILE *, rtx, int);
791 static void put_condition_code (enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name (void);
794 static int get_some_local_dynamic_name_1 (rtx *, void *);
795 static rtx maybe_get_pool_constant (rtx);
796 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
797 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
799 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
800 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
802 static rtx get_thread_pointer (int);
803 static rtx legitimize_tls_address (rtx, enum tls_model, int);
804 static void get_pc_thunk_name (char [32], unsigned int);
805 static rtx gen_push (rtx);
806 static int memory_address_length (rtx addr);
807 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
808 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
809 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
810 static void ix86_dump_ppro_packet (FILE *);
811 static void ix86_reorder_insn (rtx *, rtx *);
812 static struct machine_function * ix86_init_machine_status (void);
813 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
814 static int ix86_nsaved_regs (void);
815 static void ix86_emit_save_regs (void);
816 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
817 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
818 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
819 static void ix86_sched_reorder_ppro (rtx *, rtx *);
820 static HOST_WIDE_INT ix86_GOT_alias_set (void);
821 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
822 static rtx ix86_expand_aligntest (rtx, int);
823 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
824 static int ix86_issue_rate (void);
825 static int ix86_adjust_cost (rtx, rtx, rtx, int);
826 static void ix86_sched_init (FILE *, int, int);
827 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
828 static int ix86_variable_issue (FILE *, int, rtx, int);
829 static int ia32_use_dfa_pipeline_interface (void);
830 static int ia32_multipass_dfa_lookahead (void);
831 static void ix86_init_mmx_sse_builtins (void);
832 static rtx x86_this_parameter (tree);
833 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree);
835 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
836 static void x86_file_start (void);
837 static void ix86_reorg (void);
838 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
839 static tree ix86_build_builtin_va_list (void);
840 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
845 rtx base, index, disp;
847 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
850 static int ix86_decompose_address (rtx, struct ix86_address *);
851 static int ix86_address_cost (rtx);
852 static bool ix86_cannot_force_const_mem (rtx);
853 static rtx ix86_delegitimize_address (rtx);
855 struct builtin_description;
856 static rtx ix86_expand_sse_comi (const struct builtin_description *,
858 static rtx ix86_expand_sse_compare (const struct builtin_description *,
860 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
861 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
862 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
863 static rtx ix86_expand_store_builtin (enum insn_code, tree);
864 static rtx safe_vector_operand (rtx, enum machine_mode);
865 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
866 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
867 enum rtx_code *, enum rtx_code *);
868 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
869 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
870 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
871 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
872 static int ix86_fp_comparison_cost (enum rtx_code code);
873 static unsigned int ix86_select_alt_pic_regnum (void);
874 static int ix86_save_reg (unsigned int, int);
875 static void ix86_compute_frame_layout (struct ix86_frame *);
876 static int ix86_comp_type_attributes (tree, tree);
877 static int ix86_function_regparm (tree, tree);
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall (tree, tree);
880 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
881 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
882 static int ix86_value_regno (enum machine_mode);
883 static bool contains_128bit_aligned_vector_p (tree);
884 static bool ix86_ms_bitfield_layout_p (tree);
885 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
886 static int extended_reg_mentioned_1 (rtx *, void *);
887 static bool ix86_rtx_costs (rtx, int, int, int *);
888 static int min_insn_size (rtx);
890 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
891 static void ix86_svr3_asm_out_constructor (rtx, int);
894 /* Register class used for passing given 64bit part of the argument.
895 These represent classes as documented by the PS ABI, with the exception
896 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
897 use SF or DFmode move instead of DImode to avoid reformatting penalties.
899 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
900 whenever possible (upper half does contain padding).
902 enum x86_64_reg_class
905 X86_64_INTEGER_CLASS,
906 X86_64_INTEGERSI_CLASS,
915 static const char * const x86_64_reg_class_name[] =
916 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
918 #define MAX_CLASSES 4
919 static int classify_argument (enum machine_mode, tree,
920 enum x86_64_reg_class [MAX_CLASSES], int);
921 static int examine_argument (enum machine_mode, tree, int, int *, int *);
922 static rtx construct_container (enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
925 enum x86_64_reg_class);
927 /* Table of constants used by fldpi, fldln2, etc.... */
928 static REAL_VALUE_TYPE ext_80387_constants_table [5];
929 static bool ext_80387_constants_init = 0;
930 static void init_ext_80387_constants (void);
932 /* Initialize the GCC target structure. */
933 #undef TARGET_ATTRIBUTE_TABLE
934 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
935 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
936 # undef TARGET_MERGE_DECL_ATTRIBUTES
937 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
940 #undef TARGET_COMP_TYPE_ATTRIBUTES
941 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
943 #undef TARGET_INIT_BUILTINS
944 #define TARGET_INIT_BUILTINS ix86_init_builtins
946 #undef TARGET_EXPAND_BUILTIN
947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
949 #undef TARGET_ASM_FUNCTION_EPILOGUE
950 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
952 #undef TARGET_ASM_OPEN_PAREN
953 #define TARGET_ASM_OPEN_PAREN ""
954 #undef TARGET_ASM_CLOSE_PAREN
955 #define TARGET_ASM_CLOSE_PAREN ""
957 #undef TARGET_ASM_ALIGNED_HI_OP
958 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959 #undef TARGET_ASM_ALIGNED_SI_OP
960 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
962 #undef TARGET_ASM_ALIGNED_DI_OP
963 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
966 #undef TARGET_ASM_UNALIGNED_HI_OP
967 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968 #undef TARGET_ASM_UNALIGNED_SI_OP
969 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970 #undef TARGET_ASM_UNALIGNED_DI_OP
971 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
973 #undef TARGET_SCHED_ADJUST_COST
974 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975 #undef TARGET_SCHED_ISSUE_RATE
976 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977 #undef TARGET_SCHED_VARIABLE_ISSUE
978 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979 #undef TARGET_SCHED_INIT
980 #define TARGET_SCHED_INIT ix86_sched_init
981 #undef TARGET_SCHED_REORDER
982 #define TARGET_SCHED_REORDER ix86_sched_reorder
983 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
984 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985 ia32_use_dfa_pipeline_interface
986 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988 ia32_multipass_dfa_lookahead
990 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
991 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
994 #undef TARGET_HAVE_TLS
995 #define TARGET_HAVE_TLS true
997 #undef TARGET_CANNOT_FORCE_CONST_MEM
998 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1000 #undef TARGET_DELEGITIMIZE_ADDRESS
1001 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1003 #undef TARGET_MS_BITFIELD_LAYOUT_P
1004 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1006 #undef TARGET_ASM_OUTPUT_MI_THUNK
1007 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1008 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1011 #undef TARGET_ASM_FILE_START
1012 #define TARGET_ASM_FILE_START x86_file_start
1014 #undef TARGET_RTX_COSTS
1015 #define TARGET_RTX_COSTS ix86_rtx_costs
1016 #undef TARGET_ADDRESS_COST
1017 #define TARGET_ADDRESS_COST ix86_address_cost
1019 #undef TARGET_FIXED_CONDITION_CODE_REGS
1020 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1021 #undef TARGET_CC_MODES_COMPATIBLE
1022 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1024 #undef TARGET_MACHINE_DEPENDENT_REORG
1025 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1027 #undef TARGET_BUILD_BUILTIN_VA_LIST
1028 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1030 #undef TARGET_PROMOTE_PROTOTYPES
1031 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1033 #undef TARGET_SETUP_INCOMING_VARARGS
1034 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1036 struct gcc_target targetm = TARGET_INITIALIZER;
1038 /* The svr4 ABI for the i386 says that records and unions are returned
1040 #ifndef DEFAULT_PCC_STRUCT_RETURN
1041 #define DEFAULT_PCC_STRUCT_RETURN 1
1044 /* Sometimes certain combinations of command options do not make
1045 sense on a particular target machine. You can define a macro
1046 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1047 defined, is executed once just after all the command options have
1050 Don't use this macro to turn on various extra optimizations for
1051 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1054 override_options (void)
1057 /* Comes from final.c -- no real reason to change it. */
1058 #define MAX_CODE_ALIGN 16
1062 const struct processor_costs *cost; /* Processor costs */
1063 const int target_enable; /* Target flags to enable. */
1064 const int target_disable; /* Target flags to disable. */
1065 const int align_loop; /* Default alignments. */
1066 const int align_loop_max_skip;
1067 const int align_jump;
1068 const int align_jump_max_skip;
1069 const int align_func;
1071 const processor_target_table[PROCESSOR_max] =
1073 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1074 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1075 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1077 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1078 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1079 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1080 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1083 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1086 const char *const name; /* processor name or nickname. */
1087 const enum processor_type processor;
1088 const enum pta_flags
1093 PTA_PREFETCH_SSE = 8,
1099 const processor_alias_table[] =
1101 {"i386", PROCESSOR_I386, 0},
1102 {"i486", PROCESSOR_I486, 0},
1103 {"i586", PROCESSOR_PENTIUM, 0},
1104 {"pentium", PROCESSOR_PENTIUM, 0},
1105 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1106 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1107 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1108 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1109 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1110 {"i686", PROCESSOR_PENTIUMPRO, 0},
1111 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1112 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1113 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1114 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1115 PTA_MMX | PTA_PREFETCH_SSE},
1116 {"k6", PROCESSOR_K6, PTA_MMX},
1117 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1119 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1121 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1122 | PTA_3DNOW | PTA_3DNOW_A},
1123 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1124 | PTA_3DNOW_A | PTA_SSE},
1125 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1126 | PTA_3DNOW_A | PTA_SSE},
1127 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1128 | PTA_3DNOW_A | PTA_SSE},
1129 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1130 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1131 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1132 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1133 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1134 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1135 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1136 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1139 int const pta_size = ARRAY_SIZE (processor_alias_table);
1141 /* Set the default values for switches whose default depends on TARGET_64BIT
1142 in case they weren't overwritten by command line options. */
1145 if (flag_omit_frame_pointer == 2)
1146 flag_omit_frame_pointer = 1;
1147 if (flag_asynchronous_unwind_tables == 2)
1148 flag_asynchronous_unwind_tables = 1;
1149 if (flag_pcc_struct_return == 2)
1150 flag_pcc_struct_return = 0;
1154 if (flag_omit_frame_pointer == 2)
1155 flag_omit_frame_pointer = 0;
1156 if (flag_asynchronous_unwind_tables == 2)
1157 flag_asynchronous_unwind_tables = 0;
1158 if (flag_pcc_struct_return == 2)
1159 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1162 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1163 SUBTARGET_OVERRIDE_OPTIONS;
1166 if (!ix86_tune_string && ix86_arch_string)
1167 ix86_tune_string = ix86_arch_string;
1168 if (!ix86_tune_string)
1169 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1170 if (!ix86_arch_string)
1171 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1173 if (ix86_cmodel_string != 0)
1175 if (!strcmp (ix86_cmodel_string, "small"))
1176 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1178 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1179 else if (!strcmp (ix86_cmodel_string, "32"))
1180 ix86_cmodel = CM_32;
1181 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1182 ix86_cmodel = CM_KERNEL;
1183 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1184 ix86_cmodel = CM_MEDIUM;
1185 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1186 ix86_cmodel = CM_LARGE;
1188 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1192 ix86_cmodel = CM_32;
1194 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1196 if (ix86_asm_string != 0)
1198 if (!strcmp (ix86_asm_string, "intel"))
1199 ix86_asm_dialect = ASM_INTEL;
1200 else if (!strcmp (ix86_asm_string, "att"))
1201 ix86_asm_dialect = ASM_ATT;
1203 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1205 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1206 error ("code model `%s' not supported in the %s bit mode",
1207 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1208 if (ix86_cmodel == CM_LARGE)
1209 sorry ("code model `large' not supported yet");
1210 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1211 sorry ("%i-bit mode not compiled in",
1212 (target_flags & MASK_64BIT) ? 64 : 32);
1214 for (i = 0; i < pta_size; i++)
1215 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1217 ix86_arch = processor_alias_table[i].processor;
1218 /* Default cpu tuning to the architecture. */
1219 ix86_tune = ix86_arch;
1220 if (processor_alias_table[i].flags & PTA_MMX
1221 && !(target_flags_explicit & MASK_MMX))
1222 target_flags |= MASK_MMX;
1223 if (processor_alias_table[i].flags & PTA_3DNOW
1224 && !(target_flags_explicit & MASK_3DNOW))
1225 target_flags |= MASK_3DNOW;
1226 if (processor_alias_table[i].flags & PTA_3DNOW_A
1227 && !(target_flags_explicit & MASK_3DNOW_A))
1228 target_flags |= MASK_3DNOW_A;
1229 if (processor_alias_table[i].flags & PTA_SSE
1230 && !(target_flags_explicit & MASK_SSE))
1231 target_flags |= MASK_SSE;
1232 if (processor_alias_table[i].flags & PTA_SSE2
1233 && !(target_flags_explicit & MASK_SSE2))
1234 target_flags |= MASK_SSE2;
1235 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1236 x86_prefetch_sse = true;
1237 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1238 error ("CPU you selected does not support x86-64 instruction set");
1243 error ("bad value (%s) for -march= switch", ix86_arch_string);
1245 for (i = 0; i < pta_size; i++)
1246 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1248 ix86_tune = processor_alias_table[i].processor;
1249 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1250 error ("CPU you selected does not support x86-64 instruction set");
1253 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1254 x86_prefetch_sse = true;
1256 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1259 ix86_cost = &size_cost;
1261 ix86_cost = processor_target_table[ix86_tune].cost;
1262 target_flags |= processor_target_table[ix86_tune].target_enable;
1263 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1265 /* Arrange to set up i386_stack_locals for all functions. */
1266 init_machine_status = ix86_init_machine_status;
1268 /* Validate -mregparm= value. */
1269 if (ix86_regparm_string)
1271 i = atoi (ix86_regparm_string);
1272 if (i < 0 || i > REGPARM_MAX)
1273 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1279 ix86_regparm = REGPARM_MAX;
1281 /* If the user has provided any of the -malign-* options,
1282 warn and use that value only if -falign-* is not set.
1283 Remove this code in GCC 3.2 or later. */
1284 if (ix86_align_loops_string)
1286 warning ("-malign-loops is obsolete, use -falign-loops");
1287 if (align_loops == 0)
1289 i = atoi (ix86_align_loops_string);
1290 if (i < 0 || i > MAX_CODE_ALIGN)
1291 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1293 align_loops = 1 << i;
1297 if (ix86_align_jumps_string)
1299 warning ("-malign-jumps is obsolete, use -falign-jumps");
1300 if (align_jumps == 0)
1302 i = atoi (ix86_align_jumps_string);
1303 if (i < 0 || i > MAX_CODE_ALIGN)
1304 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1306 align_jumps = 1 << i;
1310 if (ix86_align_funcs_string)
1312 warning ("-malign-functions is obsolete, use -falign-functions");
1313 if (align_functions == 0)
1315 i = atoi (ix86_align_funcs_string);
1316 if (i < 0 || i > MAX_CODE_ALIGN)
1317 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1319 align_functions = 1 << i;
1323 /* Default align_* from the processor table. */
1324 if (align_loops == 0)
1326 align_loops = processor_target_table[ix86_tune].align_loop;
1327 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1329 if (align_jumps == 0)
1331 align_jumps = processor_target_table[ix86_tune].align_jump;
1332 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1334 if (align_functions == 0)
1336 align_functions = processor_target_table[ix86_tune].align_func;
1339 /* Validate -mpreferred-stack-boundary= value, or provide default.
1340 The default of 128 bits is for Pentium III's SSE __m128, but we
1341 don't want additional code to keep the stack aligned when
1342 optimizing for code size. */
1343 ix86_preferred_stack_boundary = (optimize_size
1344 ? TARGET_64BIT ? 128 : 32
1346 if (ix86_preferred_stack_boundary_string)
1348 i = atoi (ix86_preferred_stack_boundary_string);
1349 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1350 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1351 TARGET_64BIT ? 4 : 2);
1353 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1356 /* Validate -mbranch-cost= value, or provide default. */
1357 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1358 if (ix86_branch_cost_string)
1360 i = atoi (ix86_branch_cost_string);
1362 error ("-mbranch-cost=%d is not between 0 and 5", i);
1364 ix86_branch_cost = i;
1367 if (ix86_tls_dialect_string)
1369 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1370 ix86_tls_dialect = TLS_DIALECT_GNU;
1371 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1372 ix86_tls_dialect = TLS_DIALECT_SUN;
1374 error ("bad value (%s) for -mtls-dialect= switch",
1375 ix86_tls_dialect_string);
1378 /* Keep nonleaf frame pointers. */
1379 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1380 flag_omit_frame_pointer = 1;
1382 /* If we're doing fast math, we don't care about comparison order
1383 wrt NaNs. This lets us use a shorter comparison sequence. */
1384 if (flag_unsafe_math_optimizations)
1385 target_flags &= ~MASK_IEEE_FP;
1387 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1388 since the insns won't need emulation. */
1389 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1390 target_flags &= ~MASK_NO_FANCY_MATH_387;
1392 /* Turn on SSE2 builtins for -mpni. */
1394 target_flags |= MASK_SSE2;
1396 /* Turn on SSE builtins for -msse2. */
1398 target_flags |= MASK_SSE;
1402 if (TARGET_ALIGN_DOUBLE)
1403 error ("-malign-double makes no sense in the 64bit mode");
1405 error ("-mrtd calling convention not supported in the 64bit mode");
1406 /* Enable by default the SSE and MMX builtins. */
1407 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1408 ix86_fpmath = FPMATH_SSE;
1412 ix86_fpmath = FPMATH_387;
1413 /* i386 ABI does not specify red zone. It still makes sense to use it
1414 when programmer takes care to stack from being destroyed. */
1415 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1416 target_flags |= MASK_NO_RED_ZONE;
1419 if (ix86_fpmath_string != 0)
1421 if (! strcmp (ix86_fpmath_string, "387"))
1422 ix86_fpmath = FPMATH_387;
1423 else if (! strcmp (ix86_fpmath_string, "sse"))
1427 warning ("SSE instruction set disabled, using 387 arithmetics");
1428 ix86_fpmath = FPMATH_387;
1431 ix86_fpmath = FPMATH_SSE;
1433 else if (! strcmp (ix86_fpmath_string, "387,sse")
1434 || ! strcmp (ix86_fpmath_string, "sse,387"))
1438 warning ("SSE instruction set disabled, using 387 arithmetics");
1439 ix86_fpmath = FPMATH_387;
1441 else if (!TARGET_80387)
1443 warning ("387 instruction set disabled, using SSE arithmetics");
1444 ix86_fpmath = FPMATH_SSE;
1447 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1450 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1453 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1457 target_flags |= MASK_MMX;
1458 x86_prefetch_sse = true;
1461 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1464 target_flags |= MASK_MMX;
1465 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1466 extensions it adds. */
1467 if (x86_3dnow_a & (1 << ix86_arch))
1468 target_flags |= MASK_3DNOW_A;
1470 if ((x86_accumulate_outgoing_args & TUNEMASK)
1471 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1473 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1475 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1478 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1479 p = strchr (internal_label_prefix, 'X');
1480 internal_label_prefix_len = p - internal_label_prefix;
1486 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1488 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1489 make the problem with not enough registers even worse. */
1490 #ifdef INSN_SCHEDULING
1492 flag_schedule_insns = 0;
1495 /* The default values of these switches depend on the TARGET_64BIT
1496 that is not known at this moment. Mark these values with 2 and
1497 let user the to override these. In case there is no command line option
1498 specifying them, we will set the defaults in override_options. */
1500 flag_omit_frame_pointer = 2;
1501 flag_pcc_struct_return = 2;
1502 flag_asynchronous_unwind_tables = 2;
1505 /* Table of valid machine attributes. */
1506 const struct attribute_spec ix86_attribute_table[] =
1508 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1509 /* Stdcall attribute says callee is responsible for popping arguments
1510 if they are not variable. */
1511 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1512 /* Fastcall attribute says callee is responsible for popping arguments
1513 if they are not variable. */
1514 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1515 /* Cdecl attribute says the callee is a normal C declaration */
1516 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1517 /* Regparm attribute specifies how many integer arguments are to be
1518 passed in registers. */
1519 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1520 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1521 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1522 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1523 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1525 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1526 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1527 { NULL, 0, 0, false, false, false, NULL }
1530 /* Decide whether we can make a sibling call to a function. DECL is the
1531 declaration of the function being targeted by the call and EXP is the
1532 CALL_EXPR representing the call. */
1535 ix86_function_ok_for_sibcall (tree decl, tree exp)
1537 /* If we are generating position-independent code, we cannot sibcall
1538 optimize any indirect call, or a direct call to a global function,
1539 as the PLT requires %ebx be live. */
1540 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1543 /* If we are returning floats on the 80387 register stack, we cannot
1544 make a sibcall from a function that doesn't return a float to a
1545 function that does or, conversely, from a function that does return
1546 a float to a function that doesn't; the necessary stack adjustment
1547 would not be executed. */
1548 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1549 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1552 /* If this call is indirect, we'll need to be able to use a call-clobbered
1553 register for the address of the target function. Make sure that all
1554 such registers are not used for passing parameters. */
1555 if (!decl && !TARGET_64BIT)
1559 /* We're looking at the CALL_EXPR, we need the type of the function. */
1560 type = TREE_OPERAND (exp, 0); /* pointer expression */
1561 type = TREE_TYPE (type); /* pointer type */
1562 type = TREE_TYPE (type); /* function type */
1564 if (ix86_function_regparm (type, NULL) >= 3)
1566 /* ??? Need to count the actual number of registers to be used,
1567 not the possible number of registers. Fix later. */
1572 /* Otherwise okay. That also includes certain types of indirect calls. */
1576 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1577 arguments as in struct attribute_spec.handler. */
1579 ix86_handle_cdecl_attribute (tree *node, tree name,
1580 tree args ATTRIBUTE_UNUSED,
1581 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1583 if (TREE_CODE (*node) != FUNCTION_TYPE
1584 && TREE_CODE (*node) != METHOD_TYPE
1585 && TREE_CODE (*node) != FIELD_DECL
1586 && TREE_CODE (*node) != TYPE_DECL)
1588 warning ("`%s' attribute only applies to functions",
1589 IDENTIFIER_POINTER (name));
1590 *no_add_attrs = true;
1594 if (is_attribute_p ("fastcall", name))
1596 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1598 error ("fastcall and stdcall attributes are not compatible");
1600 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1602 error ("fastcall and regparm attributes are not compatible");
1605 else if (is_attribute_p ("stdcall", name))
1607 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1609 error ("fastcall and stdcall attributes are not compatible");
1616 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1617 *no_add_attrs = true;
1623 /* Handle a "regparm" attribute;
1624 arguments as in struct attribute_spec.handler. */
1626 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1627 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1629 if (TREE_CODE (*node) != FUNCTION_TYPE
1630 && TREE_CODE (*node) != METHOD_TYPE
1631 && TREE_CODE (*node) != FIELD_DECL
1632 && TREE_CODE (*node) != TYPE_DECL)
1634 warning ("`%s' attribute only applies to functions",
1635 IDENTIFIER_POINTER (name));
1636 *no_add_attrs = true;
1642 cst = TREE_VALUE (args);
1643 if (TREE_CODE (cst) != INTEGER_CST)
1645 warning ("`%s' attribute requires an integer constant argument",
1646 IDENTIFIER_POINTER (name));
1647 *no_add_attrs = true;
1649 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1651 warning ("argument to `%s' attribute larger than %d",
1652 IDENTIFIER_POINTER (name), REGPARM_MAX);
1653 *no_add_attrs = true;
1656 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1658 error ("fastcall and regparm attributes are not compatible");
1665 /* Return 0 if the attributes for two types are incompatible, 1 if they
1666 are compatible, and 2 if they are nearly compatible (which causes a
1667 warning to be generated). */
1670 ix86_comp_type_attributes (tree type1, tree type2)
1672 /* Check for mismatch of non-default calling convention. */
1673 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1675 if (TREE_CODE (type1) != FUNCTION_TYPE)
1678 /* Check for mismatched fastcall types */
1679 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1680 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1683 /* Check for mismatched return types (cdecl vs stdcall). */
1684 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1685 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1690 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1691 DECL may be NULL when calling function indirectly
1692 or considering a libcall. */
1695 ix86_function_regparm (tree type, tree decl)
1698 int regparm = ix86_regparm;
1699 bool user_convention = false;
1703 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1706 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1707 user_convention = true;
1710 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1713 user_convention = true;
1716 /* Use register calling convention for local functions when possible. */
1717 if (!TARGET_64BIT && !user_convention && decl
1718 && flag_unit_at_a_time && !profile_flag)
1720 struct cgraph_local_info *i = cgraph_local_info (decl);
1723 /* We can't use regparm(3) for nested functions as these use
1724 static chain pointer in third argument. */
1725 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1735 /* Return true if EAX is live at the start of the function. Used by
1736 ix86_expand_prologue to determine if we need special help before
1737 calling allocate_stack_worker. */
1740 ix86_eax_live_at_start_p (void)
1742 /* Cheat. Don't bother working forward from ix86_function_regparm
1743 to the function type to whether an actual argument is located in
1744 eax. Instead just look at cfg info, which is still close enough
1745 to correct at this point. This gives false positives for broken
1746 functions that might use uninitialized data that happens to be
1747 allocated in eax, but who cares? */
1748 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1751 /* Value is the number of bytes of arguments automatically
1752 popped when returning from a subroutine call.
1753 FUNDECL is the declaration node of the function (as a tree),
1754 FUNTYPE is the data type of the function (as a tree),
1755 or for a library call it is an identifier node for the subroutine name.
1756 SIZE is the number of bytes of arguments passed on the stack.
1758 On the 80386, the RTD insn may be used to pop them if the number
1759 of args is fixed, but if the number is variable then the caller
1760 must pop them all. RTD can't be used for library calls now
1761 because the library is compiled with the Unix compiler.
1762 Use of RTD is a selectable option, since it is incompatible with
1763 standard Unix calling sequences. If the option is not selected,
1764 the caller must always pop the args.
1766 The attribute stdcall is equivalent to RTD on a per module basis. */
1769 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1771 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1773 /* Cdecl functions override -mrtd, and never pop the stack. */
1774 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1776 /* Stdcall and fastcall functions will pop the stack if not
1778 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1779 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1783 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1784 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1785 == void_type_node)))
1789 /* Lose any fake structure return argument if it is passed on the stack. */
1790 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1793 int nregs = ix86_function_regparm (funtype, fundecl);
1796 return GET_MODE_SIZE (Pmode);
1802 /* Argument support functions. */
1804 /* Return true when register may be used to pass function parameters. */
1806 ix86_function_arg_regno_p (int regno)
1810 return (regno < REGPARM_MAX
1811 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1812 if (SSE_REGNO_P (regno) && TARGET_SSE)
1814 /* RAX is used as hidden argument to va_arg functions. */
1817 for (i = 0; i < REGPARM_MAX; i++)
1818 if (regno == x86_64_int_parameter_registers[i])
1823 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1824 for a call to a function whose data type is FNTYPE.
1825 For a library call, FNTYPE is 0. */
1828 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1829 tree fntype, /* tree ptr for function decl */
1830 rtx libname, /* SYMBOL_REF of library name or 0 */
1833 static CUMULATIVE_ARGS zero_cum;
1834 tree param, next_param;
1836 if (TARGET_DEBUG_ARG)
1838 fprintf (stderr, "\ninit_cumulative_args (");
1840 fprintf (stderr, "fntype code = %s, ret code = %s",
1841 tree_code_name[(int) TREE_CODE (fntype)],
1842 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1844 fprintf (stderr, "no fntype");
1847 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1852 /* Set up the number of registers to use for passing arguments. */
1854 cum->nregs = ix86_function_regparm (fntype, fndecl);
1856 cum->nregs = ix86_regparm;
1857 cum->sse_nregs = SSE_REGPARM_MAX;
1858 cum->mmx_nregs = MMX_REGPARM_MAX;
1859 cum->warn_sse = true;
1860 cum->warn_mmx = true;
1861 cum->maybe_vaarg = false;
1863 /* Use ecx and edx registers if function has fastcall attribute */
1864 if (fntype && !TARGET_64BIT)
1866 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1874 /* Determine if this function has variable arguments. This is
1875 indicated by the last argument being 'void_type_mode' if there
1876 are no variable arguments. If there are variable arguments, then
1877 we won't pass anything in registers */
1879 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1881 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1882 param != 0; param = next_param)
1884 next_param = TREE_CHAIN (param);
1885 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1896 cum->maybe_vaarg = true;
1900 if ((!fntype && !libname)
1901 || (fntype && !TYPE_ARG_TYPES (fntype)))
1902 cum->maybe_vaarg = 1;
1904 if (TARGET_DEBUG_ARG)
1905 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1910 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1911 of this code is to classify each 8bytes of incoming argument by the register
1912 class and assign registers accordingly. */
1914 /* Return the union class of CLASS1 and CLASS2.
1915 See the x86-64 PS ABI for details. */
1917 static enum x86_64_reg_class
1918 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1920 /* Rule #1: If both classes are equal, this is the resulting class. */
1921 if (class1 == class2)
1924 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1926 if (class1 == X86_64_NO_CLASS)
1928 if (class2 == X86_64_NO_CLASS)
1931 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1932 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1933 return X86_64_MEMORY_CLASS;
1935 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1936 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1937 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1938 return X86_64_INTEGERSI_CLASS;
1939 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1940 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1941 return X86_64_INTEGER_CLASS;
1943 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1944 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1945 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1946 return X86_64_MEMORY_CLASS;
1948 /* Rule #6: Otherwise class SSE is used. */
1949 return X86_64_SSE_CLASS;
1952 /* Classify the argument of type TYPE and mode MODE.
1953 CLASSES will be filled by the register class used to pass each word
1954 of the operand. The number of words is returned. In case the parameter
1955 should be passed in memory, 0 is returned. As a special case for zero
1956 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1958 BIT_OFFSET is used internally for handling records and specifies offset
1959 of the offset in bits modulo 256 to avoid overflow cases.
1961 See the x86-64 PS ABI for details.
1965 classify_argument (enum machine_mode mode, tree type,
1966 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1968 HOST_WIDE_INT bytes =
1969 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1970 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1972 /* Variable sized entities are always passed/returned in memory. */
1976 if (mode != VOIDmode
1977 && MUST_PASS_IN_STACK (mode, type))
1980 if (type && AGGREGATE_TYPE_P (type))
1984 enum x86_64_reg_class subclasses[MAX_CLASSES];
1986 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1990 for (i = 0; i < words; i++)
1991 classes[i] = X86_64_NO_CLASS;
1993 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1994 signalize memory class, so handle it as special case. */
1997 classes[0] = X86_64_NO_CLASS;
2001 /* Classify each field of record and merge classes. */
2002 if (TREE_CODE (type) == RECORD_TYPE)
2004 /* For classes first merge in the field of the subclasses. */
2005 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2007 tree bases = TYPE_BINFO_BASETYPES (type);
2008 int n_bases = TREE_VEC_LENGTH (bases);
2011 for (i = 0; i < n_bases; ++i)
2013 tree binfo = TREE_VEC_ELT (bases, i);
2015 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2016 tree type = BINFO_TYPE (binfo);
2018 num = classify_argument (TYPE_MODE (type),
2020 (offset + bit_offset) % 256);
2023 for (i = 0; i < num; i++)
2025 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2027 merge_classes (subclasses[i], classes[i + pos]);
2031 /* And now merge the fields of structure. */
2032 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2034 if (TREE_CODE (field) == FIELD_DECL)
2038 /* Bitfields are always classified as integer. Handle them
2039 early, since later code would consider them to be
2040 misaligned integers. */
2041 if (DECL_BIT_FIELD (field))
2043 for (i = int_bit_position (field) / 8 / 8;
2044 i < (int_bit_position (field)
2045 + tree_low_cst (DECL_SIZE (field), 0)
2048 merge_classes (X86_64_INTEGER_CLASS,
2053 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2054 TREE_TYPE (field), subclasses,
2055 (int_bit_position (field)
2056 + bit_offset) % 256);
2059 for (i = 0; i < num; i++)
2062 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2064 merge_classes (subclasses[i], classes[i + pos]);
2070 /* Arrays are handled as small records. */
2071 else if (TREE_CODE (type) == ARRAY_TYPE)
2074 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2075 TREE_TYPE (type), subclasses, bit_offset);
2079 /* The partial classes are now full classes. */
2080 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2081 subclasses[0] = X86_64_SSE_CLASS;
2082 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2083 subclasses[0] = X86_64_INTEGER_CLASS;
2085 for (i = 0; i < words; i++)
2086 classes[i] = subclasses[i % num];
2088 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2089 else if (TREE_CODE (type) == UNION_TYPE
2090 || TREE_CODE (type) == QUAL_UNION_TYPE)
2092 /* For classes first merge in the field of the subclasses. */
2093 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2095 tree bases = TYPE_BINFO_BASETYPES (type);
2096 int n_bases = TREE_VEC_LENGTH (bases);
2099 for (i = 0; i < n_bases; ++i)
2101 tree binfo = TREE_VEC_ELT (bases, i);
2103 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2104 tree type = BINFO_TYPE (binfo);
2106 num = classify_argument (TYPE_MODE (type),
2108 (offset + (bit_offset % 64)) % 256);
2111 for (i = 0; i < num; i++)
2113 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2115 merge_classes (subclasses[i], classes[i + pos]);
2119 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2121 if (TREE_CODE (field) == FIELD_DECL)
2124 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2125 TREE_TYPE (field), subclasses,
2129 for (i = 0; i < num; i++)
2130 classes[i] = merge_classes (subclasses[i], classes[i]);
2134 else if (TREE_CODE (type) == SET_TYPE)
2138 classes[0] = X86_64_INTEGERSI_CLASS;
2141 else if (bytes <= 8)
2143 classes[0] = X86_64_INTEGER_CLASS;
2146 else if (bytes <= 12)
2148 classes[0] = X86_64_INTEGER_CLASS;
2149 classes[1] = X86_64_INTEGERSI_CLASS;
2154 classes[0] = X86_64_INTEGER_CLASS;
2155 classes[1] = X86_64_INTEGER_CLASS;
2162 /* Final merger cleanup. */
2163 for (i = 0; i < words; i++)
2165 /* If one class is MEMORY, everything should be passed in
2167 if (classes[i] == X86_64_MEMORY_CLASS)
2170 /* The X86_64_SSEUP_CLASS should be always preceded by
2171 X86_64_SSE_CLASS. */
2172 if (classes[i] == X86_64_SSEUP_CLASS
2173 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2174 classes[i] = X86_64_SSE_CLASS;
2176 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2177 if (classes[i] == X86_64_X87UP_CLASS
2178 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2179 classes[i] = X86_64_SSE_CLASS;
2184 /* Compute alignment needed. We align all types to natural boundaries with
2185 exception of XFmode that is aligned to 64bits. */
2186 if (mode != VOIDmode && mode != BLKmode)
2188 int mode_alignment = GET_MODE_BITSIZE (mode);
2191 mode_alignment = 128;
2192 else if (mode == XCmode)
2193 mode_alignment = 256;
2194 /* Misaligned fields are always returned in memory. */
2195 if (bit_offset % mode_alignment)
2199 /* Classification of atomic types. */
2209 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2210 classes[0] = X86_64_INTEGERSI_CLASS;
2212 classes[0] = X86_64_INTEGER_CLASS;
2216 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2219 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2220 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2223 if (!(bit_offset % 64))
2224 classes[0] = X86_64_SSESF_CLASS;
2226 classes[0] = X86_64_SSE_CLASS;
2229 classes[0] = X86_64_SSEDF_CLASS;
2232 classes[0] = X86_64_X87_CLASS;
2233 classes[1] = X86_64_X87UP_CLASS;
2239 classes[0] = X86_64_X87_CLASS;
2240 classes[1] = X86_64_X87UP_CLASS;
2241 classes[2] = X86_64_X87_CLASS;
2242 classes[3] = X86_64_X87UP_CLASS;
2245 classes[0] = X86_64_SSEDF_CLASS;
2246 classes[1] = X86_64_SSEDF_CLASS;
2249 classes[0] = X86_64_SSE_CLASS;
2257 classes[0] = X86_64_SSE_CLASS;
2258 classes[1] = X86_64_SSEUP_CLASS;
2273 /* Examine the argument and return set number of register required in each
2274 class. Return 0 iff parameter should be passed in memory. */
2276 examine_argument (enum machine_mode mode, tree type, int in_return,
2277 int *int_nregs, int *sse_nregs)
2279 enum x86_64_reg_class class[MAX_CLASSES];
2280 int n = classify_argument (mode, type, class, 0);
2286 for (n--; n >= 0; n--)
2289 case X86_64_INTEGER_CLASS:
2290 case X86_64_INTEGERSI_CLASS:
2293 case X86_64_SSE_CLASS:
2294 case X86_64_SSESF_CLASS:
2295 case X86_64_SSEDF_CLASS:
2298 case X86_64_NO_CLASS:
2299 case X86_64_SSEUP_CLASS:
2301 case X86_64_X87_CLASS:
2302 case X86_64_X87UP_CLASS:
2306 case X86_64_MEMORY_CLASS:
2311 /* Construct container for the argument used by GCC interface. See
2312 FUNCTION_ARG for the detailed description. */
2314 construct_container (enum machine_mode mode, tree type, int in_return,
2315 int nintregs, int nsseregs, const int * intreg,
2318 enum machine_mode tmpmode;
2320 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2321 enum x86_64_reg_class class[MAX_CLASSES];
2325 int needed_sseregs, needed_intregs;
2326 rtx exp[MAX_CLASSES];
2329 n = classify_argument (mode, type, class, 0);
2330 if (TARGET_DEBUG_ARG)
2333 fprintf (stderr, "Memory class\n");
2336 fprintf (stderr, "Classes:");
2337 for (i = 0; i < n; i++)
2339 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2341 fprintf (stderr, "\n");
2346 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2348 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2351 /* First construct simple cases. Avoid SCmode, since we want to use
2352 single register to pass this type. */
2353 if (n == 1 && mode != SCmode)
2356 case X86_64_INTEGER_CLASS:
2357 case X86_64_INTEGERSI_CLASS:
2358 return gen_rtx_REG (mode, intreg[0]);
2359 case X86_64_SSE_CLASS:
2360 case X86_64_SSESF_CLASS:
2361 case X86_64_SSEDF_CLASS:
2362 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2363 case X86_64_X87_CLASS:
2364 return gen_rtx_REG (mode, FIRST_STACK_REG);
2365 case X86_64_NO_CLASS:
2366 /* Zero sized array, struct or class. */
2371 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2372 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2374 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2375 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2376 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2377 && class[1] == X86_64_INTEGER_CLASS
2378 && (mode == CDImode || mode == TImode || mode == TFmode)
2379 && intreg[0] + 1 == intreg[1])
2380 return gen_rtx_REG (mode, intreg[0]);
2382 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2383 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2384 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2386 /* Otherwise figure out the entries of the PARALLEL. */
2387 for (i = 0; i < n; i++)
2391 case X86_64_NO_CLASS:
2393 case X86_64_INTEGER_CLASS:
2394 case X86_64_INTEGERSI_CLASS:
2395 /* Merge TImodes on aligned occasions here too. */
2396 if (i * 8 + 8 > bytes)
2397 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2398 else if (class[i] == X86_64_INTEGERSI_CLASS)
2402 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2403 if (tmpmode == BLKmode)
2405 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2406 gen_rtx_REG (tmpmode, *intreg),
2410 case X86_64_SSESF_CLASS:
2411 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2412 gen_rtx_REG (SFmode,
2413 SSE_REGNO (sse_regno)),
2417 case X86_64_SSEDF_CLASS:
2418 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2419 gen_rtx_REG (DFmode,
2420 SSE_REGNO (sse_regno)),
2424 case X86_64_SSE_CLASS:
2425 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2430 gen_rtx_REG (tmpmode,
2431 SSE_REGNO (sse_regno)),
2433 if (tmpmode == TImode)
2441 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2442 for (i = 0; i < nexps; i++)
2443 XVECEXP (ret, 0, i) = exp [i];
2447 /* Update the data in CUM to advance over an argument
2448 of mode MODE and data type TYPE.
2449 (TYPE is null for libcalls where that information may not be available.) */
2452 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2453 enum machine_mode mode, /* current arg mode */
2454 tree type, /* type of the argument or 0 if lib support */
2455 int named) /* whether or not the argument was named */
2458 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2459 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2461 if (TARGET_DEBUG_ARG)
2463 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2464 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2467 int int_nregs, sse_nregs;
2468 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2469 cum->words += words;
2470 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2472 cum->nregs -= int_nregs;
2473 cum->sse_nregs -= sse_nregs;
2474 cum->regno += int_nregs;
2475 cum->sse_regno += sse_nregs;
2478 cum->words += words;
2482 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2483 && (!type || !AGGREGATE_TYPE_P (type)))
2485 cum->sse_words += words;
2486 cum->sse_nregs -= 1;
2487 cum->sse_regno += 1;
2488 if (cum->sse_nregs <= 0)
2494 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2495 && (!type || !AGGREGATE_TYPE_P (type)))
2497 cum->mmx_words += words;
2498 cum->mmx_nregs -= 1;
2499 cum->mmx_regno += 1;
2500 if (cum->mmx_nregs <= 0)
2508 cum->words += words;
2509 cum->nregs -= words;
2510 cum->regno += words;
2512 if (cum->nregs <= 0)
2522 /* Define where to put the arguments to a function.
2523 Value is zero to push the argument on the stack,
2524 or a hard register in which to store the argument.
2526 MODE is the argument's machine mode.
2527 TYPE is the data type of the argument (as a tree).
2528 This is null for libcalls where that information may
2530 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2531 the preceding args and about the function being called.
2532 NAMED is nonzero if this argument is a named parameter
2533 (otherwise it is an extra parameter matching an ellipsis). */
2536 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2537 enum machine_mode mode, /* current arg mode */
2538 tree type, /* type of the argument or 0 if lib support */
2539 int named) /* != 0 for normal args, == 0 for ... args */
2543 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2544 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2545 static bool warnedsse, warnedmmx;
2547 /* Handle a hidden AL argument containing number of registers for varargs
2548 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2550 if (mode == VOIDmode)
2553 return GEN_INT (cum->maybe_vaarg
2554 ? (cum->sse_nregs < 0
2562 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2563 &x86_64_int_parameter_registers [cum->regno],
2568 /* For now, pass fp/complex values on the stack. */
2580 if (words <= cum->nregs)
2582 int regno = cum->regno;
2584 /* Fastcall allocates the first two DWORD (SImode) or
2585 smaller arguments to ECX and EDX. */
2588 if (mode == BLKmode || mode == DImode)
2591 /* ECX not EAX is the first allocated register. */
2595 ret = gen_rtx_REG (mode, regno);
2605 if (!type || !AGGREGATE_TYPE_P (type))
2607 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2610 warning ("SSE vector argument without SSE enabled "
2614 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2621 if (!type || !AGGREGATE_TYPE_P (type))
2623 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2626 warning ("MMX vector argument without MMX enabled "
2630 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2635 if (TARGET_DEBUG_ARG)
2638 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2639 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2642 print_simple_rtl (stderr, ret);
2644 fprintf (stderr, ", stack");
2646 fprintf (stderr, " )\n");
2652 /* A C expression that indicates when an argument must be passed by
2653 reference. If nonzero for an argument, a copy of that argument is
2654 made in memory and a pointer to the argument is passed instead of
2655 the argument itself. The pointer is passed in whatever way is
2656 appropriate for passing a pointer to that type. */
2659 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2660 enum machine_mode mode ATTRIBUTE_UNUSED,
2661 tree type, int named ATTRIBUTE_UNUSED)
2666 if (type && int_size_in_bytes (type) == -1)
2668 if (TARGET_DEBUG_ARG)
2669 fprintf (stderr, "function_arg_pass_by_reference\n");
2676 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2679 contains_128bit_aligned_vector_p (tree type)
2681 enum machine_mode mode = TYPE_MODE (type);
2682 if (SSE_REG_MODE_P (mode)
2683 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2685 if (TYPE_ALIGN (type) < 128)
2688 if (AGGREGATE_TYPE_P (type))
2690 /* Walk the aggregates recursively. */
2691 if (TREE_CODE (type) == RECORD_TYPE
2692 || TREE_CODE (type) == UNION_TYPE
2693 || TREE_CODE (type) == QUAL_UNION_TYPE)
2697 if (TYPE_BINFO (type) != NULL
2698 && TYPE_BINFO_BASETYPES (type) != NULL)
2700 tree bases = TYPE_BINFO_BASETYPES (type);
2701 int n_bases = TREE_VEC_LENGTH (bases);
2704 for (i = 0; i < n_bases; ++i)
2706 tree binfo = TREE_VEC_ELT (bases, i);
2707 tree type = BINFO_TYPE (binfo);
2709 if (contains_128bit_aligned_vector_p (type))
2713 /* And now merge the fields of structure. */
2714 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2716 if (TREE_CODE (field) == FIELD_DECL
2717 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2721 /* Just for use if some languages passes arrays by value. */
2722 else if (TREE_CODE (type) == ARRAY_TYPE)
2724 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2733 /* Gives the alignment boundary, in bits, of an argument with the
2734 specified mode and type. */
2737 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2741 align = TYPE_ALIGN (type);
2743 align = GET_MODE_ALIGNMENT (mode);
2744 if (align < PARM_BOUNDARY)
2745 align = PARM_BOUNDARY;
2748 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2749 make an exception for SSE modes since these require 128bit
2752 The handling here differs from field_alignment. ICC aligns MMX
2753 arguments to 4 byte boundaries, while structure fields are aligned
2754 to 8 byte boundaries. */
2757 if (!SSE_REG_MODE_P (mode))
2758 align = PARM_BOUNDARY;
2762 if (!contains_128bit_aligned_vector_p (type))
2763 align = PARM_BOUNDARY;
2771 /* Return true if N is a possible register number of function value. */
2773 ix86_function_value_regno_p (int regno)
2777 return ((regno) == 0
2778 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2779 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2781 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2782 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2783 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2786 /* Define how to find the value returned by a function.
2787 VALTYPE is the data type of the value (as a tree).
2788 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2789 otherwise, FUNC is 0. */
2791 ix86_function_value (tree valtype)
2795 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2796 REGPARM_MAX, SSE_REGPARM_MAX,
2797 x86_64_int_return_registers, 0);
2798 /* For zero sized structures, construct_container return NULL, but we need
2799 to keep rest of compiler happy by returning meaningful value. */
2801 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2805 return gen_rtx_REG (TYPE_MODE (valtype),
2806 ix86_value_regno (TYPE_MODE (valtype)));
2809 /* Return false iff type is returned in memory. */
2811 ix86_return_in_memory (tree type)
2813 int needed_intregs, needed_sseregs, size;
2814 enum machine_mode mode = TYPE_MODE (type);
2817 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2819 if (mode == BLKmode)
2822 size = int_size_in_bytes (type);
2824 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2827 if (VECTOR_MODE_P (mode) || mode == TImode)
2829 /* User-created vectors small enough to fit in EAX. */
2833 /* MMX/3dNow values are returned on the stack, since we've
2834 got to EMMS/FEMMS before returning. */
2838 /* SSE values are returned in XMM0. */
2839 /* ??? Except when it doesn't exist? We have a choice of
2840 either (1) being abi incompatible with a -march switch,
2841 or (2) generating an error here. Given no good solution,
2842 I think the safest thing is one warning. The user won't
2843 be able to use -Werror, but.... */
2854 warning ("SSE vector return without SSE enabled "
2869 /* Define how to find the value returned by a library function
2870 assuming the value has mode MODE. */
2872 ix86_libcall_value (enum machine_mode mode)
2882 return gen_rtx_REG (mode, FIRST_SSE_REG);
2885 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2890 return gen_rtx_REG (mode, 0);
2894 return gen_rtx_REG (mode, ix86_value_regno (mode));
2897 /* Given a mode, return the register to use for a return value. */
2900 ix86_value_regno (enum machine_mode mode)
2902 /* Floating point return values in %st(0). */
2903 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2904 return FIRST_FLOAT_REG;
2905 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2906 we prevent this case when sse is not available. */
2907 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2908 return FIRST_SSE_REG;
2909 /* Everything else in %eax. */
2913 /* Create the va_list data type. */
2916 ix86_build_builtin_va_list (void)
2918 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2920 /* For i386 we use plain pointer to argument area. */
2922 return build_pointer_type (char_type_node);
2924 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2925 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2927 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2928 unsigned_type_node);
2929 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2930 unsigned_type_node);
2931 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2933 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2936 DECL_FIELD_CONTEXT (f_gpr) = record;
2937 DECL_FIELD_CONTEXT (f_fpr) = record;
2938 DECL_FIELD_CONTEXT (f_ovf) = record;
2939 DECL_FIELD_CONTEXT (f_sav) = record;
2941 TREE_CHAIN (record) = type_decl;
2942 TYPE_NAME (record) = type_decl;
2943 TYPE_FIELDS (record) = f_gpr;
2944 TREE_CHAIN (f_gpr) = f_fpr;
2945 TREE_CHAIN (f_fpr) = f_ovf;
2946 TREE_CHAIN (f_ovf) = f_sav;
2948 layout_type (record);
2950 /* The correct type is an array type of one element. */
2951 return build_array_type (record, build_index_type (size_zero_node));
2954 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
2957 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2958 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2961 CUMULATIVE_ARGS next_cum;
2962 rtx save_area = NULL_RTX, mem;
2975 /* Indicate to allocate space on the stack for varargs save area. */
2976 ix86_save_varrargs_registers = 1;
2978 cfun->stack_alignment_needed = 128;
2980 fntype = TREE_TYPE (current_function_decl);
2981 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2982 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2983 != void_type_node));
2985 /* For varargs, we do not want to skip the dummy va_dcl argument.
2986 For stdargs, we do want to skip the last named argument. */
2989 function_arg_advance (&next_cum, mode, type, 1);
2992 save_area = frame_pointer_rtx;
2994 set = get_varargs_alias_set ();
2996 for (i = next_cum.regno; i < ix86_regparm; i++)
2998 mem = gen_rtx_MEM (Pmode,
2999 plus_constant (save_area, i * UNITS_PER_WORD));
3000 set_mem_alias_set (mem, set);
3001 emit_move_insn (mem, gen_rtx_REG (Pmode,
3002 x86_64_int_parameter_registers[i]));
3005 if (next_cum.sse_nregs)
3007 /* Now emit code to save SSE registers. The AX parameter contains number
3008 of SSE parameter registers used to call this function. We use
3009 sse_prologue_save insn template that produces computed jump across
3010 SSE saves. We need some preparation work to get this working. */
3012 label = gen_label_rtx ();
3013 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3015 /* Compute address to jump to :
3016 label - 5*eax + nnamed_sse_arguments*5 */
3017 tmp_reg = gen_reg_rtx (Pmode);
3018 nsse_reg = gen_reg_rtx (Pmode);
3019 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3020 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3021 gen_rtx_MULT (Pmode, nsse_reg,
3023 if (next_cum.sse_regno)
3026 gen_rtx_CONST (DImode,
3027 gen_rtx_PLUS (DImode,
3029 GEN_INT (next_cum.sse_regno * 4))));
3031 emit_move_insn (nsse_reg, label_ref);
3032 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3034 /* Compute address of memory block we save into. We always use pointer
3035 pointing 127 bytes after first byte to store - this is needed to keep
3036 instruction size limited by 4 bytes. */
3037 tmp_reg = gen_reg_rtx (Pmode);
3038 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3039 plus_constant (save_area,
3040 8 * REGPARM_MAX + 127)));
3041 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3042 set_mem_alias_set (mem, set);
3043 set_mem_align (mem, BITS_PER_WORD);
3045 /* And finally do the dirty job! */
3046 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3047 GEN_INT (next_cum.sse_regno), label));
3052 /* Implement va_start. */
3055 ix86_va_start (tree valist, rtx nextarg)
3057 HOST_WIDE_INT words, n_gpr, n_fpr;
3058 tree f_gpr, f_fpr, f_ovf, f_sav;
3059 tree gpr, fpr, ovf, sav, t;
3061 /* Only 64bit target needs something special. */
3064 std_expand_builtin_va_start (valist, nextarg);
3068 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3069 f_fpr = TREE_CHAIN (f_gpr);
3070 f_ovf = TREE_CHAIN (f_fpr);
3071 f_sav = TREE_CHAIN (f_ovf);
3073 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3074 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3075 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3076 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3077 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3079 /* Count number of gp and fp argument registers used. */
3080 words = current_function_args_info.words;
3081 n_gpr = current_function_args_info.regno;
3082 n_fpr = current_function_args_info.sse_regno;
3084 if (TARGET_DEBUG_ARG)
3085 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3086 (int) words, (int) n_gpr, (int) n_fpr);
3088 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3089 build_int_2 (n_gpr * 8, 0));
3090 TREE_SIDE_EFFECTS (t) = 1;
3091 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3093 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3094 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3095 TREE_SIDE_EFFECTS (t) = 1;
3096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3098 /* Find the overflow area. */
3099 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3101 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3102 build_int_2 (words * UNITS_PER_WORD, 0));
3103 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3104 TREE_SIDE_EFFECTS (t) = 1;
3105 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3107 /* Find the register save area.
3108 Prologue of the function save it right above stack frame. */
3109 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3110 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3111 TREE_SIDE_EFFECTS (t) = 1;
3112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3115 /* Implement va_arg. */
3117 ix86_va_arg (tree valist, tree type)
3119 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3120 tree f_gpr, f_fpr, f_ovf, f_sav;
3121 tree gpr, fpr, ovf, sav, t;
3123 rtx lab_false, lab_over = NULL_RTX;
3128 /* Only 64bit target needs something special. */
3131 return std_expand_builtin_va_arg (valist, type);
3134 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3135 f_fpr = TREE_CHAIN (f_gpr);
3136 f_ovf = TREE_CHAIN (f_fpr);
3137 f_sav = TREE_CHAIN (f_ovf);
3139 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3140 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3141 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3142 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3143 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3145 size = int_size_in_bytes (type);
3148 /* Passed by reference. */
3150 type = build_pointer_type (type);
3151 size = int_size_in_bytes (type);
3153 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3155 container = construct_container (TYPE_MODE (type), type, 0,
3156 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3158 * Pull the value out of the saved registers ...
3161 addr_rtx = gen_reg_rtx (Pmode);
3165 rtx int_addr_rtx, sse_addr_rtx;
3166 int needed_intregs, needed_sseregs;
3169 lab_over = gen_label_rtx ();
3170 lab_false = gen_label_rtx ();
3172 examine_argument (TYPE_MODE (type), type, 0,
3173 &needed_intregs, &needed_sseregs);
3176 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3177 || TYPE_ALIGN (type) > 128);
3179 /* In case we are passing structure, verify that it is consecutive block
3180 on the register save area. If not we need to do moves. */
3181 if (!need_temp && !REG_P (container))
3183 /* Verify that all registers are strictly consecutive */
3184 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3188 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3190 rtx slot = XVECEXP (container, 0, i);
3191 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3192 || INTVAL (XEXP (slot, 1)) != i * 16)
3200 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3202 rtx slot = XVECEXP (container, 0, i);
3203 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3204 || INTVAL (XEXP (slot, 1)) != i * 8)
3211 int_addr_rtx = addr_rtx;
3212 sse_addr_rtx = addr_rtx;
3216 int_addr_rtx = gen_reg_rtx (Pmode);
3217 sse_addr_rtx = gen_reg_rtx (Pmode);
3219 /* First ensure that we fit completely in registers. */
3222 emit_cmp_and_jump_insns (expand_expr
3223 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3224 GEN_INT ((REGPARM_MAX - needed_intregs +
3225 1) * 8), GE, const1_rtx, SImode,
3230 emit_cmp_and_jump_insns (expand_expr
3231 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3232 GEN_INT ((SSE_REGPARM_MAX -
3233 needed_sseregs + 1) * 16 +
3234 REGPARM_MAX * 8), GE, const1_rtx,
3235 SImode, 1, lab_false);
3238 /* Compute index to start of area used for integer regs. */
3241 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3242 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3243 if (r != int_addr_rtx)
3244 emit_move_insn (int_addr_rtx, r);
3248 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3249 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3250 if (r != sse_addr_rtx)
3251 emit_move_insn (sse_addr_rtx, r);
3259 /* Never use the memory itself, as it has the alias set. */
3260 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3261 mem = gen_rtx_MEM (BLKmode, x);
3262 force_operand (x, addr_rtx);
3263 set_mem_alias_set (mem, get_varargs_alias_set ());
3264 set_mem_align (mem, BITS_PER_UNIT);
3266 for (i = 0; i < XVECLEN (container, 0); i++)
3268 rtx slot = XVECEXP (container, 0, i);
3269 rtx reg = XEXP (slot, 0);
3270 enum machine_mode mode = GET_MODE (reg);
3276 if (SSE_REGNO_P (REGNO (reg)))
3278 src_addr = sse_addr_rtx;
3279 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3283 src_addr = int_addr_rtx;
3284 src_offset = REGNO (reg) * 8;
3286 src_mem = gen_rtx_MEM (mode, src_addr);
3287 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3288 src_mem = adjust_address (src_mem, mode, src_offset);
3289 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3290 emit_move_insn (dest_mem, src_mem);
3297 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3298 build_int_2 (needed_intregs * 8, 0));
3299 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3300 TREE_SIDE_EFFECTS (t) = 1;
3301 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3306 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3307 build_int_2 (needed_sseregs * 16, 0));
3308 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3309 TREE_SIDE_EFFECTS (t) = 1;
3310 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3313 emit_jump_insn (gen_jump (lab_over));
3315 emit_label (lab_false);
3318 /* ... otherwise out of the overflow area. */
3320 /* Care for on-stack alignment if needed. */
3321 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3325 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3326 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3327 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3331 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3333 emit_move_insn (addr_rtx, r);
3336 build (PLUS_EXPR, TREE_TYPE (t), t,
3337 build_int_2 (rsize * UNITS_PER_WORD, 0));
3338 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3339 TREE_SIDE_EFFECTS (t) = 1;
3340 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3343 emit_label (lab_over);
3347 r = gen_rtx_MEM (Pmode, addr_rtx);
3348 set_mem_alias_set (r, get_varargs_alias_set ());
3349 emit_move_insn (addr_rtx, r);
3355 /* Return nonzero if OP is either a i387 or SSE fp register. */
3357 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3359 return ANY_FP_REG_P (op);
3362 /* Return nonzero if OP is an i387 fp register. */
3364 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3366 return FP_REG_P (op);
3369 /* Return nonzero if OP is a non-fp register_operand. */
3371 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3373 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3376 /* Return nonzero if OP is a register operand other than an
3377 i387 fp register. */
3379 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3381 return register_operand (op, mode) && !FP_REG_P (op);
3384 /* Return nonzero if OP is general operand representable on x86_64. */
3387 x86_64_general_operand (rtx op, enum machine_mode mode)
3390 return general_operand (op, mode);
3391 if (nonimmediate_operand (op, mode))
3393 return x86_64_sign_extended_value (op);
3396 /* Return nonzero if OP is general operand representable on x86_64
3397 as either sign extended or zero extended constant. */
3400 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3403 return general_operand (op, mode);
3404 if (nonimmediate_operand (op, mode))
3406 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3409 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3412 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3415 return nonmemory_operand (op, mode);
3416 if (register_operand (op, mode))
3418 return x86_64_sign_extended_value (op);
3421 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3424 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3426 if (!TARGET_64BIT || !flag_pic)
3427 return nonmemory_operand (op, mode);
3428 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3430 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3435 /* Return nonzero if OPNUM's MEM should be matched
3436 in movabs* patterns. */
3439 ix86_check_movabs (rtx insn, int opnum)
3443 set = PATTERN (insn);
3444 if (GET_CODE (set) == PARALLEL)
3445 set = XVECEXP (set, 0, 0);
3446 if (GET_CODE (set) != SET)
3448 mem = XEXP (set, opnum);
3449 while (GET_CODE (mem) == SUBREG)
3450 mem = SUBREG_REG (mem);
3451 if (GET_CODE (mem) != MEM)
3453 return (volatile_ok || !MEM_VOLATILE_P (mem));
3456 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3459 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3462 return nonmemory_operand (op, mode);
3463 if (register_operand (op, mode))
3465 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3468 /* Return nonzero if OP is immediate operand representable on x86_64. */
3471 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3474 return immediate_operand (op, mode);
3475 return x86_64_sign_extended_value (op);
3478 /* Return nonzero if OP is immediate operand representable on x86_64. */
3481 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3483 return x86_64_zero_extended_value (op);
3486 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3487 for shift & compare patterns, as shifting by 0 does not change flags),
3488 else return zero. */
3491 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3493 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3496 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3497 reference and a constant. */
3500 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3502 switch (GET_CODE (op))
3510 if (GET_CODE (op) == SYMBOL_REF
3511 || GET_CODE (op) == LABEL_REF
3512 || (GET_CODE (op) == UNSPEC
3513 && (XINT (op, 1) == UNSPEC_GOT
3514 || XINT (op, 1) == UNSPEC_GOTOFF
3515 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3517 if (GET_CODE (op) != PLUS
3518 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3522 if (GET_CODE (op) == SYMBOL_REF
3523 || GET_CODE (op) == LABEL_REF)
3525 /* Only @GOTOFF gets offsets. */
3526 if (GET_CODE (op) != UNSPEC
3527 || XINT (op, 1) != UNSPEC_GOTOFF)
3530 op = XVECEXP (op, 0, 0);
3531 if (GET_CODE (op) == SYMBOL_REF
3532 || GET_CODE (op) == LABEL_REF)
3541 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3544 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3546 if (GET_CODE (op) != CONST)
3551 if (GET_CODE (op) == UNSPEC
3552 && XINT (op, 1) == UNSPEC_GOTPCREL)
3554 if (GET_CODE (op) == PLUS
3555 && GET_CODE (XEXP (op, 0)) == UNSPEC
3556 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3561 if (GET_CODE (op) == UNSPEC)
3563 if (GET_CODE (op) != PLUS
3564 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3567 if (GET_CODE (op) == UNSPEC)
3573 /* Return true if OP is a symbolic operand that resolves locally. */
3576 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3578 if (GET_CODE (op) == CONST
3579 && GET_CODE (XEXP (op, 0)) == PLUS
3580 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3581 op = XEXP (XEXP (op, 0), 0);
3583 if (GET_CODE (op) == LABEL_REF)
3586 if (GET_CODE (op) != SYMBOL_REF)
3589 if (SYMBOL_REF_LOCAL_P (op))
3592 /* There is, however, a not insubstantial body of code in the rest of
3593 the compiler that assumes it can just stick the results of
3594 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3595 /* ??? This is a hack. Should update the body of the compiler to
3596 always create a DECL an invoke targetm.encode_section_info. */
3597 if (strncmp (XSTR (op, 0), internal_label_prefix,
3598 internal_label_prefix_len) == 0)
3604 /* Test for various thread-local symbols. */
3607 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3609 if (GET_CODE (op) != SYMBOL_REF)
3611 return SYMBOL_REF_TLS_MODEL (op);
3615 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3617 if (GET_CODE (op) != SYMBOL_REF)
3619 return SYMBOL_REF_TLS_MODEL (op) == kind;
3623 global_dynamic_symbolic_operand (rtx op,
3624 enum machine_mode mode ATTRIBUTE_UNUSED)
3626 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3630 local_dynamic_symbolic_operand (rtx op,
3631 enum machine_mode mode ATTRIBUTE_UNUSED)
3633 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3637 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3639 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3643 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3645 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3648 /* Test for a valid operand for a call instruction. Don't allow the
3649 arg pointer register or virtual regs since they may decay into
3650 reg + const, which the patterns can't handle. */
3653 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3655 /* Disallow indirect through a virtual register. This leads to
3656 compiler aborts when trying to eliminate them. */
3657 if (GET_CODE (op) == REG
3658 && (op == arg_pointer_rtx
3659 || op == frame_pointer_rtx
3660 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3661 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3664 /* Disallow `call 1234'. Due to varying assembler lameness this
3665 gets either rejected or translated to `call .+1234'. */
3666 if (GET_CODE (op) == CONST_INT)
3669 /* Explicitly allow SYMBOL_REF even if pic. */
3670 if (GET_CODE (op) == SYMBOL_REF)
3673 /* Otherwise we can allow any general_operand in the address. */
3674 return general_operand (op, Pmode);
3677 /* Test for a valid operand for a call instruction. Don't allow the
3678 arg pointer register or virtual regs since they may decay into
3679 reg + const, which the patterns can't handle. */
3682 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3684 /* Disallow indirect through a virtual register. This leads to
3685 compiler aborts when trying to eliminate them. */
3686 if (GET_CODE (op) == REG
3687 && (op == arg_pointer_rtx
3688 || op == frame_pointer_rtx
3689 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3690 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3693 /* Explicitly allow SYMBOL_REF even if pic. */
3694 if (GET_CODE (op) == SYMBOL_REF)
3697 /* Otherwise we can only allow register operands. */
3698 return register_operand (op, Pmode);
3702 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3704 if (GET_CODE (op) == CONST
3705 && GET_CODE (XEXP (op, 0)) == PLUS
3706 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3707 op = XEXP (XEXP (op, 0), 0);
3708 return GET_CODE (op) == SYMBOL_REF;
3711 /* Match exactly zero and one. */
3714 const0_operand (rtx op, enum machine_mode mode)
3716 return op == CONST0_RTX (mode);
3720 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3722 return op == const1_rtx;
3725 /* Match 2, 4, or 8. Used for leal multiplicands. */
3728 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3730 return (GET_CODE (op) == CONST_INT
3731 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3735 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3737 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3741 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3743 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3747 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3749 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3753 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3755 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3759 /* True if this is a constant appropriate for an increment or decrement. */
3762 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3764 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3765 registers, since carry flag is not set. */
3766 if (TARGET_PENTIUM4 && !optimize_size)
3768 return op == const1_rtx || op == constm1_rtx;
3771 /* Return nonzero if OP is acceptable as operand of DImode shift
3775 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3778 return nonimmediate_operand (op, mode);
3780 return register_operand (op, mode);
3783 /* Return false if this is the stack pointer, or any other fake
3784 register eliminable to the stack pointer. Otherwise, this is
3787 This is used to prevent esp from being used as an index reg.
3788 Which would only happen in pathological cases. */
3791 reg_no_sp_operand (rtx op, enum machine_mode mode)
3794 if (GET_CODE (t) == SUBREG)
3796 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3799 return register_operand (op, mode);
3803 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805 return MMX_REG_P (op);
3808 /* Return false if this is any eliminable register. Otherwise
3812 general_no_elim_operand (rtx op, enum machine_mode mode)
3815 if (GET_CODE (t) == SUBREG)
3817 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3818 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3819 || t == virtual_stack_dynamic_rtx)
3822 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3823 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3826 return general_operand (op, mode);
3829 /* Return false if this is any eliminable register. Otherwise
3830 register_operand or const_int. */
3833 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3836 if (GET_CODE (t) == SUBREG)
3838 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3839 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3840 || t == virtual_stack_dynamic_rtx)
3843 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3846 /* Return false if this is any eliminable register or stack register,
3847 otherwise work like register_operand. */
3850 index_register_operand (rtx op, enum machine_mode mode)
3853 if (GET_CODE (t) == SUBREG)
3857 if (t == arg_pointer_rtx
3858 || t == frame_pointer_rtx
3859 || t == virtual_incoming_args_rtx
3860 || t == virtual_stack_vars_rtx
3861 || t == virtual_stack_dynamic_rtx
3862 || REGNO (t) == STACK_POINTER_REGNUM)
3865 return general_operand (op, mode);
3868 /* Return true if op is a Q_REGS class register. */
3871 q_regs_operand (rtx op, enum machine_mode mode)
3873 if (mode != VOIDmode && GET_MODE (op) != mode)
3875 if (GET_CODE (op) == SUBREG)
3876 op = SUBREG_REG (op);
3877 return ANY_QI_REG_P (op);
3880 /* Return true if op is an flags register. */
3883 flags_reg_operand (rtx op, enum machine_mode mode)
3885 if (mode != VOIDmode && GET_MODE (op) != mode)
3887 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3890 /* Return true if op is a NON_Q_REGS class register. */
3893 non_q_regs_operand (rtx op, enum machine_mode mode)
3895 if (mode != VOIDmode && GET_MODE (op) != mode)
3897 if (GET_CODE (op) == SUBREG)
3898 op = SUBREG_REG (op);
3899 return NON_QI_REG_P (op);
3903 zero_extended_scalar_load_operand (rtx op,
3904 enum machine_mode mode ATTRIBUTE_UNUSED)
3907 if (GET_CODE (op) != MEM)
3909 op = maybe_get_pool_constant (op);
3912 if (GET_CODE (op) != CONST_VECTOR)
3915 (GET_MODE_SIZE (GET_MODE (op)) /
3916 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3917 for (n_elts--; n_elts > 0; n_elts--)
3919 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3920 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3926 /* Return 1 when OP is operand acceptable for standard SSE move. */
3928 vector_move_operand (rtx op, enum machine_mode mode)
3930 if (nonimmediate_operand (op, mode))
3932 if (GET_MODE (op) != mode && mode != VOIDmode)
3934 return (op == CONST0_RTX (GET_MODE (op)));
3937 /* Return true if op if a valid address, and does not contain
3938 a segment override. */
3941 no_seg_address_operand (rtx op, enum machine_mode mode)
3943 struct ix86_address parts;
3945 if (! address_operand (op, mode))
3948 if (! ix86_decompose_address (op, &parts))
3951 return parts.seg == SEG_DEFAULT;
3954 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3957 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3959 enum rtx_code code = GET_CODE (op);
3962 /* Operations supported directly. */
3972 /* These are equivalent to ones above in non-IEEE comparisons. */
3979 return !TARGET_IEEE_FP;
3984 /* Return 1 if OP is a valid comparison operator in valid mode. */
3986 ix86_comparison_operator (rtx op, enum machine_mode mode)
3988 enum machine_mode inmode;
3989 enum rtx_code code = GET_CODE (op);
3990 if (mode != VOIDmode && GET_MODE (op) != mode)
3992 if (GET_RTX_CLASS (code) != '<')
3994 inmode = GET_MODE (XEXP (op, 0));
3996 if (inmode == CCFPmode || inmode == CCFPUmode)
3998 enum rtx_code second_code, bypass_code;
3999 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4000 return (bypass_code == NIL && second_code == NIL);
4007 if (inmode == CCmode || inmode == CCGCmode
4008 || inmode == CCGOCmode || inmode == CCNOmode)
4011 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4012 if (inmode == CCmode)
4016 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4024 /* Return 1 if OP is a valid comparison operator testing carry flag
4027 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4029 enum machine_mode inmode;
4030 enum rtx_code code = GET_CODE (op);
4032 if (mode != VOIDmode && GET_MODE (op) != mode)
4034 if (GET_RTX_CLASS (code) != '<')
4036 inmode = GET_MODE (XEXP (op, 0));
4037 if (GET_CODE (XEXP (op, 0)) != REG
4038 || REGNO (XEXP (op, 0)) != 17
4039 || XEXP (op, 1) != const0_rtx)
4042 if (inmode == CCFPmode || inmode == CCFPUmode)
4044 enum rtx_code second_code, bypass_code;
4046 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4047 if (bypass_code != NIL || second_code != NIL)
4049 code = ix86_fp_compare_code_to_integer (code);
4051 else if (inmode != CCmode)
4056 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4059 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4061 enum machine_mode inmode;
4062 enum rtx_code code = GET_CODE (op);
4064 if (mode != VOIDmode && GET_MODE (op) != mode)
4066 if (GET_RTX_CLASS (code) != '<')
4068 inmode = GET_MODE (XEXP (op, 0));
4069 if (inmode == CCFPmode || inmode == CCFPUmode)
4071 enum rtx_code second_code, bypass_code;
4073 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4074 if (bypass_code != NIL || second_code != NIL)
4076 code = ix86_fp_compare_code_to_integer (code);
4078 /* i387 supports just limited amount of conditional codes. */
4081 case LTU: case GTU: case LEU: case GEU:
4082 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4085 case ORDERED: case UNORDERED:
4093 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4096 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4098 switch (GET_CODE (op))
4101 /* Modern CPUs have same latency for HImode and SImode multiply,
4102 but 386 and 486 do HImode multiply faster. */
4103 return ix86_tune > PROCESSOR_I486;
4115 /* Nearly general operand, but accept any const_double, since we wish
4116 to be able to drop them into memory rather than have them get pulled
4120 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4122 if (mode != VOIDmode && mode != GET_MODE (op))
4124 if (GET_CODE (op) == CONST_DOUBLE)
4126 return general_operand (op, mode);
4129 /* Match an SI or HImode register for a zero_extract. */
4132 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4135 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4136 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4139 if (!register_operand (op, VOIDmode))
4142 /* Be careful to accept only registers having upper parts. */
4143 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4144 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4147 /* Return 1 if this is a valid binary floating-point operation.
4148 OP is the expression matched, and MODE is its mode. */
4151 binary_fp_operator (rtx op, enum machine_mode mode)
4153 if (mode != VOIDmode && mode != GET_MODE (op))
4156 switch (GET_CODE (op))
4162 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4170 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4172 return GET_CODE (op) == MULT;
4176 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4178 return GET_CODE (op) == DIV;
4182 arith_or_logical_operator (rtx op, enum machine_mode mode)
4184 return ((mode == VOIDmode || GET_MODE (op) == mode)
4185 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4186 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4189 /* Returns 1 if OP is memory operand with a displacement. */
4192 memory_displacement_operand (rtx op, enum machine_mode mode)
4194 struct ix86_address parts;
4196 if (! memory_operand (op, mode))
4199 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4202 return parts.disp != NULL_RTX;
4205 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4206 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4208 ??? It seems likely that this will only work because cmpsi is an
4209 expander, and no actual insns use this. */
4212 cmpsi_operand (rtx op, enum machine_mode mode)
4214 if (nonimmediate_operand (op, mode))
4217 if (GET_CODE (op) == AND
4218 && GET_MODE (op) == SImode
4219 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4220 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4221 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4222 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4223 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4224 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4230 /* Returns 1 if OP is memory operand that can not be represented by the
4234 long_memory_operand (rtx op, enum machine_mode mode)
4236 if (! memory_operand (op, mode))
4239 return memory_address_length (op) != 0;
4242 /* Return nonzero if the rtx is known aligned. */
4245 aligned_operand (rtx op, enum machine_mode mode)
4247 struct ix86_address parts;
4249 if (!general_operand (op, mode))
4252 /* Registers and immediate operands are always "aligned". */
4253 if (GET_CODE (op) != MEM)
4256 /* Don't even try to do any aligned optimizations with volatiles. */
4257 if (MEM_VOLATILE_P (op))
4262 /* Pushes and pops are only valid on the stack pointer. */
4263 if (GET_CODE (op) == PRE_DEC
4264 || GET_CODE (op) == POST_INC)
4267 /* Decode the address. */
4268 if (! ix86_decompose_address (op, &parts))
4271 /* Look for some component that isn't known to be aligned. */
4275 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4280 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4285 if (GET_CODE (parts.disp) != CONST_INT
4286 || (INTVAL (parts.disp) & 3) != 0)
4290 /* Didn't find one -- this must be an aligned address. */
4294 /* Initialize the table of extra 80387 mathematical constants. */
4297 init_ext_80387_constants (void)
4299 static const char * cst[5] =
4301 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4302 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4303 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4304 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4305 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4309 for (i = 0; i < 5; i++)
4311 real_from_string (&ext_80387_constants_table[i], cst[i]);
4312 /* Ensure each constant is rounded to XFmode precision. */
4313 real_convert (&ext_80387_constants_table[i],
4314 XFmode, &ext_80387_constants_table[i]);
4317 ext_80387_constants_init = 1;
4320 /* Return true if the constant is something that can be loaded with
4321 a special instruction. */
4324 standard_80387_constant_p (rtx x)
4326 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4329 if (x == CONST0_RTX (GET_MODE (x)))
4331 if (x == CONST1_RTX (GET_MODE (x)))
4334 /* For XFmode constants, try to find a special 80387 instruction on
4335 those CPUs that benefit from them. */
4336 if (GET_MODE (x) == XFmode
4337 && x86_ext_80387_constants & TUNEMASK)
4342 if (! ext_80387_constants_init)
4343 init_ext_80387_constants ();
4345 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4346 for (i = 0; i < 5; i++)
4347 if (real_identical (&r, &ext_80387_constants_table[i]))
4354 /* Return the opcode of the special instruction to be used to load
4358 standard_80387_constant_opcode (rtx x)
4360 switch (standard_80387_constant_p (x))
4380 /* Return the CONST_DOUBLE representing the 80387 constant that is
4381 loaded by the specified special instruction. The argument IDX
4382 matches the return value from standard_80387_constant_p. */
4385 standard_80387_constant_rtx (int idx)
4389 if (! ext_80387_constants_init)
4390 init_ext_80387_constants ();
4406 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4410 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4413 standard_sse_constant_p (rtx x)
4415 if (x == const0_rtx)
4417 return (x == CONST0_RTX (GET_MODE (x)));
4420 /* Returns 1 if OP contains a symbol reference */
4423 symbolic_reference_mentioned_p (rtx op)
4428 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4431 fmt = GET_RTX_FORMAT (GET_CODE (op));
4432 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4438 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4439 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4443 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4450 /* Return 1 if it is appropriate to emit `ret' instructions in the
4451 body of a function. Do this only if the epilogue is simple, needing a
4452 couple of insns. Prior to reloading, we can't tell how many registers
4453 must be saved, so return 0 then. Return 0 if there is no frame
4454 marker to de-allocate.
4456 If NON_SAVING_SETJMP is defined and true, then it is not possible
4457 for the epilogue to be simple, so return 0. This is a special case
4458 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4459 until final, but jump_optimize may need to know sooner if a
4463 ix86_can_use_return_insn_p (void)
4465 struct ix86_frame frame;
4467 #ifdef NON_SAVING_SETJMP
4468 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4472 if (! reload_completed || frame_pointer_needed)
4475 /* Don't allow more than 32 pop, since that's all we can do
4476 with one instruction. */
4477 if (current_function_pops_args
4478 && current_function_args_size >= 32768)
4481 ix86_compute_frame_layout (&frame);
4482 return frame.to_allocate == 0 && frame.nregs == 0;
4485 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4487 x86_64_sign_extended_value (rtx value)
4489 switch (GET_CODE (value))
4491 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4492 to be at least 32 and this all acceptable constants are
4493 represented as CONST_INT. */
4495 if (HOST_BITS_PER_WIDE_INT == 32)
4499 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4500 return trunc_int_for_mode (val, SImode) == val;
4504 /* For certain code models, the symbolic references are known to fit.
4505 in CM_SMALL_PIC model we know it fits if it is local to the shared
4506 library. Don't count TLS SYMBOL_REFs here, since they should fit
4507 only if inside of UNSPEC handled below. */
4509 /* TLS symbols are not constant. */
4510 if (tls_symbolic_operand (value, Pmode))
4512 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4514 /* For certain code models, the code is near as well. */
4516 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4517 || ix86_cmodel == CM_KERNEL);
4519 /* We also may accept the offsetted memory references in certain special
4522 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4523 switch (XINT (XEXP (value, 0), 1))
4525 case UNSPEC_GOTPCREL:
4527 case UNSPEC_GOTNTPOFF:
4533 if (GET_CODE (XEXP (value, 0)) == PLUS)
4535 rtx op1 = XEXP (XEXP (value, 0), 0);
4536 rtx op2 = XEXP (XEXP (value, 0), 1);
4537 HOST_WIDE_INT offset;
4539 if (ix86_cmodel == CM_LARGE)
4541 if (GET_CODE (op2) != CONST_INT)
4543 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4544 switch (GET_CODE (op1))
4547 /* For CM_SMALL assume that latest object is 16MB before
4548 end of 31bits boundary. We may also accept pretty
4549 large negative constants knowing that all objects are
4550 in the positive half of address space. */
4551 if (ix86_cmodel == CM_SMALL
4552 && offset < 16*1024*1024
4553 && trunc_int_for_mode (offset, SImode) == offset)
4555 /* For CM_KERNEL we know that all object resist in the
4556 negative half of 32bits address space. We may not
4557 accept negative offsets, since they may be just off
4558 and we may accept pretty large positive ones. */
4559 if (ix86_cmodel == CM_KERNEL
4561 && trunc_int_for_mode (offset, SImode) == offset)
4565 /* These conditions are similar to SYMBOL_REF ones, just the
4566 constraints for code models differ. */
4567 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4568 && offset < 16*1024*1024
4569 && trunc_int_for_mode (offset, SImode) == offset)
4571 if (ix86_cmodel == CM_KERNEL
4573 && trunc_int_for_mode (offset, SImode) == offset)
4577 switch (XINT (op1, 1))
4582 && trunc_int_for_mode (offset, SImode) == offset)
4596 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4598 x86_64_zero_extended_value (rtx value)
4600 switch (GET_CODE (value))
4603 if (HOST_BITS_PER_WIDE_INT == 32)
4604 return (GET_MODE (value) == VOIDmode
4605 && !CONST_DOUBLE_HIGH (value));
4609 if (HOST_BITS_PER_WIDE_INT == 32)
4610 return INTVAL (value) >= 0;
4612 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4615 /* For certain code models, the symbolic references are known to fit. */
4617 /* TLS symbols are not constant. */
4618 if (tls_symbolic_operand (value, Pmode))
4620 return ix86_cmodel == CM_SMALL;
4622 /* For certain code models, the code is near as well. */
4624 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4626 /* We also may accept the offsetted memory references in certain special
4629 if (GET_CODE (XEXP (value, 0)) == PLUS)
4631 rtx op1 = XEXP (XEXP (value, 0), 0);
4632 rtx op2 = XEXP (XEXP (value, 0), 1);
4634 if (ix86_cmodel == CM_LARGE)
4636 switch (GET_CODE (op1))
4640 /* For small code model we may accept pretty large positive
4641 offsets, since one bit is available for free. Negative
4642 offsets are limited by the size of NULL pointer area
4643 specified by the ABI. */
4644 if (ix86_cmodel == CM_SMALL
4645 && GET_CODE (op2) == CONST_INT
4646 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4647 && (trunc_int_for_mode (INTVAL (op2), SImode)
4650 /* ??? For the kernel, we may accept adjustment of
4651 -0x10000000, since we know that it will just convert
4652 negative address space to positive, but perhaps this
4653 is not worthwhile. */
4656 /* These conditions are similar to SYMBOL_REF ones, just the
4657 constraints for code models differ. */
4658 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4659 && GET_CODE (op2) == CONST_INT
4660 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4661 && (trunc_int_for_mode (INTVAL (op2), SImode)
4675 /* Value should be nonzero if functions must have frame pointers.
4676 Zero means the frame pointer need not be set up (and parms may
4677 be accessed via the stack pointer) in functions that seem suitable. */
4680 ix86_frame_pointer_required (void)
4682 /* If we accessed previous frames, then the generated code expects
4683 to be able to access the saved ebp value in our frame. */
4684 if (cfun->machine->accesses_prev_frame)
4687 /* Several x86 os'es need a frame pointer for other reasons,
4688 usually pertaining to setjmp. */
4689 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4692 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4693 the frame pointer by default. Turn it back on now if we've not
4694 got a leaf function. */
4695 if (TARGET_OMIT_LEAF_FRAME_POINTER
4696 && (!current_function_is_leaf))
4699 if (current_function_profile)
4705 /* Record that the current function accesses previous call frames. */
4708 ix86_setup_frame_addresses (void)
4710 cfun->machine->accesses_prev_frame = 1;
4713 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4714 # define USE_HIDDEN_LINKONCE 1
4716 # define USE_HIDDEN_LINKONCE 0
4719 static int pic_labels_used;
4721 /* Fills in the label name that should be used for a pc thunk for
4722 the given register. */
4725 get_pc_thunk_name (char name[32], unsigned int regno)
4727 if (USE_HIDDEN_LINKONCE)
4728 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4730 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4734 /* This function generates code for -fpic that loads %ebx with
4735 the return address of the caller and then returns. */
4738 ix86_file_end (void)
4743 for (regno = 0; regno < 8; ++regno)
4747 if (! ((pic_labels_used >> regno) & 1))
4750 get_pc_thunk_name (name, regno);
4752 if (USE_HIDDEN_LINKONCE)
4756 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4758 TREE_PUBLIC (decl) = 1;
4759 TREE_STATIC (decl) = 1;
4760 DECL_ONE_ONLY (decl) = 1;
4762 (*targetm.asm_out.unique_section) (decl, 0);
4763 named_section (decl, NULL, 0);
4765 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4766 fputs ("\t.hidden\t", asm_out_file);
4767 assemble_name (asm_out_file, name);
4768 fputc ('\n', asm_out_file);
4769 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4774 ASM_OUTPUT_LABEL (asm_out_file, name);
4777 xops[0] = gen_rtx_REG (SImode, regno);
4778 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4779 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4780 output_asm_insn ("ret", xops);
4783 if (NEED_INDICATE_EXEC_STACK)
4784 file_end_indicate_exec_stack ();
4787 /* Emit code for the SET_GOT patterns. */
4790 output_set_got (rtx dest)
4795 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4797 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4799 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4802 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4804 output_asm_insn ("call\t%a2", xops);
4807 /* Output the "canonical" label name ("Lxx$pb") here too. This
4808 is what will be referred to by the Mach-O PIC subsystem. */
4809 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4811 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4812 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4815 output_asm_insn ("pop{l}\t%0", xops);
4820 get_pc_thunk_name (name, REGNO (dest));
4821 pic_labels_used |= 1 << REGNO (dest);
4823 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4824 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4825 output_asm_insn ("call\t%X2", xops);
4828 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4829 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4830 else if (!TARGET_MACHO)
4831 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4836 /* Generate an "push" pattern for input ARG. */
4841 return gen_rtx_SET (VOIDmode,
4843 gen_rtx_PRE_DEC (Pmode,
4844 stack_pointer_rtx)),
4848 /* Return >= 0 if there is an unused call-clobbered register available
4849 for the entire function. */
4852 ix86_select_alt_pic_regnum (void)
4854 if (current_function_is_leaf && !current_function_profile)
4857 for (i = 2; i >= 0; --i)
4858 if (!regs_ever_live[i])
4862 return INVALID_REGNUM;
4865 /* Return 1 if we need to save REGNO. */
4867 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4869 if (pic_offset_table_rtx
4870 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4871 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4872 || current_function_profile
4873 || current_function_calls_eh_return
4874 || current_function_uses_const_pool))
4876 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4881 if (current_function_calls_eh_return && maybe_eh_return)
4886 unsigned test = EH_RETURN_DATA_REGNO (i);
4887 if (test == INVALID_REGNUM)
4894 return (regs_ever_live[regno]
4895 && !call_used_regs[regno]
4896 && !fixed_regs[regno]
4897 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4900 /* Return number of registers to be saved on the stack. */
4903 ix86_nsaved_regs (void)
4908 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4909 if (ix86_save_reg (regno, true))
4914 /* Return the offset between two registers, one to be eliminated, and the other
4915 its replacement, at the start of a routine. */
4918 ix86_initial_elimination_offset (int from, int to)
4920 struct ix86_frame frame;
4921 ix86_compute_frame_layout (&frame);
4923 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4924 return frame.hard_frame_pointer_offset;
4925 else if (from == FRAME_POINTER_REGNUM
4926 && to == HARD_FRAME_POINTER_REGNUM)
4927 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4930 if (to != STACK_POINTER_REGNUM)
4932 else if (from == ARG_POINTER_REGNUM)
4933 return frame.stack_pointer_offset;
4934 else if (from != FRAME_POINTER_REGNUM)
4937 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4941 /* Fill structure ix86_frame about frame of currently computed function. */
4944 ix86_compute_frame_layout (struct ix86_frame *frame)
4946 HOST_WIDE_INT total_size;
4947 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4948 HOST_WIDE_INT offset;
4949 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4950 HOST_WIDE_INT size = get_frame_size ();
4952 frame->nregs = ix86_nsaved_regs ();
4955 /* During reload iteration the amount of registers saved can change.
4956 Recompute the value as needed. Do not recompute when amount of registers
4957 didn't change as reload does mutiple calls to the function and does not
4958 expect the decision to change within single iteration. */
4960 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4962 int count = frame->nregs;
4964 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4965 /* The fast prologue uses move instead of push to save registers. This
4966 is significantly longer, but also executes faster as modern hardware
4967 can execute the moves in parallel, but can't do that for push/pop.
4969 Be careful about choosing what prologue to emit: When function takes
4970 many instructions to execute we may use slow version as well as in
4971 case function is known to be outside hot spot (this is known with
4972 feedback only). Weight the size of function by number of registers
4973 to save as it is cheap to use one or two push instructions but very
4974 slow to use many of them. */
4976 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4977 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4978 || (flag_branch_probabilities
4979 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4980 cfun->machine->use_fast_prologue_epilogue = false;
4982 cfun->machine->use_fast_prologue_epilogue
4983 = !expensive_function_p (count);
4985 if (TARGET_PROLOGUE_USING_MOVE
4986 && cfun->machine->use_fast_prologue_epilogue)
4987 frame->save_regs_using_mov = true;
4989 frame->save_regs_using_mov = false;
4992 /* Skip return address and saved base pointer. */
4993 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4995 frame->hard_frame_pointer_offset = offset;
4997 /* Do some sanity checking of stack_alignment_needed and
4998 preferred_alignment, since i386 port is the only using those features
4999 that may break easily. */
5001 if (size && !stack_alignment_needed)
5003 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5005 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5007 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5010 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5011 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5013 /* Register save area */
5014 offset += frame->nregs * UNITS_PER_WORD;
5017 if (ix86_save_varrargs_registers)
5019 offset += X86_64_VARARGS_SIZE;
5020 frame->va_arg_size = X86_64_VARARGS_SIZE;
5023 frame->va_arg_size = 0;
5025 /* Align start of frame for local function. */
5026 frame->padding1 = ((offset + stack_alignment_needed - 1)
5027 & -stack_alignment_needed) - offset;
5029 offset += frame->padding1;
5031 /* Frame pointer points here. */
5032 frame->frame_pointer_offset = offset;
5036 /* Add outgoing arguments area. Can be skipped if we eliminated
5037 all the function calls as dead code.
5038 Skipping is however impossible when function calls alloca. Alloca
5039 expander assumes that last current_function_outgoing_args_size
5040 of stack frame are unused. */
5041 if (ACCUMULATE_OUTGOING_ARGS
5042 && (!current_function_is_leaf || current_function_calls_alloca))
5044 offset += current_function_outgoing_args_size;
5045 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5048 frame->outgoing_arguments_size = 0;
5050 /* Align stack boundary. Only needed if we're calling another function
5052 if (!current_function_is_leaf || current_function_calls_alloca)
5053 frame->padding2 = ((offset + preferred_alignment - 1)
5054 & -preferred_alignment) - offset;
5056 frame->padding2 = 0;
5058 offset += frame->padding2;
5060 /* We've reached end of stack frame. */
5061 frame->stack_pointer_offset = offset;
5063 /* Size prologue needs to allocate. */
5064 frame->to_allocate =
5065 (size + frame->padding1 + frame->padding2
5066 + frame->outgoing_arguments_size + frame->va_arg_size);
5068 if ((!frame->to_allocate && frame->nregs <= 1)
5069 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5070 frame->save_regs_using_mov = false;
5072 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5073 && current_function_is_leaf)
5075 frame->red_zone_size = frame->to_allocate;
5076 if (frame->save_regs_using_mov)
5077 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5078 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5079 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5082 frame->red_zone_size = 0;
5083 frame->to_allocate -= frame->red_zone_size;
5084 frame->stack_pointer_offset -= frame->red_zone_size;
5086 fprintf (stderr, "nregs: %i\n", frame->nregs);
5087 fprintf (stderr, "size: %i\n", size);
5088 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5089 fprintf (stderr, "padding1: %i\n", frame->padding1);
5090 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5091 fprintf (stderr, "padding2: %i\n", frame->padding2);
5092 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5093 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5094 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5095 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5096 frame->hard_frame_pointer_offset);
5097 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5101 /* Emit code to save registers in the prologue. */
5104 ix86_emit_save_regs (void)
5109 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5110 if (ix86_save_reg (regno, true))
5112 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5113 RTX_FRAME_RELATED_P (insn) = 1;
5117 /* Emit code to save registers using MOV insns. First register
5118 is restored from POINTER + OFFSET. */
5120 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5125 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5126 if (ix86_save_reg (regno, true))
5128 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5130 gen_rtx_REG (Pmode, regno));
5131 RTX_FRAME_RELATED_P (insn) = 1;
5132 offset += UNITS_PER_WORD;
5136 /* Expand prologue or epilogue stack adjustment.
5137 The pattern exist to put a dependency on all ebp-based memory accesses.
5138 STYLE should be negative if instructions should be marked as frame related,
5139 zero if %r11 register is live and cannot be freely used and positive
5143 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5148 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5149 else if (x86_64_immediate_operand (offset, DImode))
5150 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5154 /* r11 is used by indirect sibcall return as well, set before the
5155 epilogue and used after the epilogue. ATM indirect sibcall
5156 shouldn't be used together with huge frame sizes in one
5157 function because of the frame_size check in sibcall.c. */
5160 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5161 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5163 RTX_FRAME_RELATED_P (insn) = 1;
5164 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5168 RTX_FRAME_RELATED_P (insn) = 1;
5171 /* Expand the prologue into a bunch of separate insns. */
5174 ix86_expand_prologue (void)
5178 struct ix86_frame frame;
5179 HOST_WIDE_INT allocate;
5181 ix86_compute_frame_layout (&frame);
5183 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5184 slower on all targets. Also sdb doesn't like it. */
5186 if (frame_pointer_needed)
5188 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5189 RTX_FRAME_RELATED_P (insn) = 1;
5191 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5192 RTX_FRAME_RELATED_P (insn) = 1;
5195 allocate = frame.to_allocate;
5197 if (!frame.save_regs_using_mov)
5198 ix86_emit_save_regs ();
5200 allocate += frame.nregs * UNITS_PER_WORD;
5202 /* When using red zone we may start register saving before allocating
5203 the stack frame saving one cycle of the prologue. */
5204 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5205 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5206 : stack_pointer_rtx,
5207 -frame.nregs * UNITS_PER_WORD);
5211 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5212 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5213 GEN_INT (-allocate), -1);
5216 /* Only valid for Win32. */
5217 rtx eax = gen_rtx_REG (SImode, 0);
5218 bool eax_live = ix86_eax_live_at_start_p ();
5225 emit_insn (gen_push (eax));
5229 insn = emit_move_insn (eax, GEN_INT (allocate));
5230 RTX_FRAME_RELATED_P (insn) = 1;
5232 insn = emit_insn (gen_allocate_stack_worker (eax));
5233 RTX_FRAME_RELATED_P (insn) = 1;
5237 rtx t = plus_constant (stack_pointer_rtx, allocate);
5238 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5242 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5244 if (!frame_pointer_needed || !frame.to_allocate)
5245 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5247 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5248 -frame.nregs * UNITS_PER_WORD);
5251 pic_reg_used = false;
5252 if (pic_offset_table_rtx
5253 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5254 || current_function_profile))
5256 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5258 if (alt_pic_reg_used != INVALID_REGNUM)
5259 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5261 pic_reg_used = true;
5266 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5268 /* Even with accurate pre-reload life analysis, we can wind up
5269 deleting all references to the pic register after reload.
5270 Consider if cross-jumping unifies two sides of a branch
5271 controlled by a comparison vs the only read from a global.
5272 In which case, allow the set_got to be deleted, though we're
5273 too late to do anything about the ebx save in the prologue. */
5274 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5277 /* Prevent function calls from be scheduled before the call to mcount.
5278 In the pic_reg_used case, make sure that the got load isn't deleted. */
5279 if (current_function_profile)
5280 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5283 /* Emit code to restore saved registers using MOV insns. First register
5284 is restored from POINTER + OFFSET. */
5286 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5287 int maybe_eh_return)
5290 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5292 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5293 if (ix86_save_reg (regno, maybe_eh_return))
5295 /* Ensure that adjust_address won't be forced to produce pointer
5296 out of range allowed by x86-64 instruction set. */
5297 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5301 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5302 emit_move_insn (r11, GEN_INT (offset));
5303 emit_insn (gen_adddi3 (r11, r11, pointer));
5304 base_address = gen_rtx_MEM (Pmode, r11);
5307 emit_move_insn (gen_rtx_REG (Pmode, regno),
5308 adjust_address (base_address, Pmode, offset));
5309 offset += UNITS_PER_WORD;
5313 /* Restore function stack, frame, and registers. */
5316 ix86_expand_epilogue (int style)
5319 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5320 struct ix86_frame frame;
5321 HOST_WIDE_INT offset;
5323 ix86_compute_frame_layout (&frame);
5325 /* Calculate start of saved registers relative to ebp. Special care
5326 must be taken for the normal return case of a function using
5327 eh_return: the eax and edx registers are marked as saved, but not
5328 restored along this path. */
5329 offset = frame.nregs;
5330 if (current_function_calls_eh_return && style != 2)
5332 offset *= -UNITS_PER_WORD;
5334 /* If we're only restoring one register and sp is not valid then
5335 using a move instruction to restore the register since it's
5336 less work than reloading sp and popping the register.
5338 The default code result in stack adjustment using add/lea instruction,
5339 while this code results in LEAVE instruction (or discrete equivalent),
5340 so it is profitable in some other cases as well. Especially when there
5341 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5342 and there is exactly one register to pop. This heuristic may need some
5343 tuning in future. */
5344 if ((!sp_valid && frame.nregs <= 1)
5345 || (TARGET_EPILOGUE_USING_MOVE
5346 && cfun->machine->use_fast_prologue_epilogue
5347 && (frame.nregs > 1 || frame.to_allocate))
5348 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5349 || (frame_pointer_needed && TARGET_USE_LEAVE
5350 && cfun->machine->use_fast_prologue_epilogue
5351 && frame.nregs == 1)
5352 || current_function_calls_eh_return)
5354 /* Restore registers. We can use ebp or esp to address the memory
5355 locations. If both are available, default to ebp, since offsets
5356 are known to be small. Only exception is esp pointing directly to the
5357 end of block of saved registers, where we may simplify addressing
5360 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5361 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5362 frame.to_allocate, style == 2);
5364 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5365 offset, style == 2);
5367 /* eh_return epilogues need %ecx added to the stack pointer. */
5370 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5372 if (frame_pointer_needed)
5374 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5375 tmp = plus_constant (tmp, UNITS_PER_WORD);
5376 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5378 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5379 emit_move_insn (hard_frame_pointer_rtx, tmp);
5381 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5386 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5387 tmp = plus_constant (tmp, (frame.to_allocate
5388 + frame.nregs * UNITS_PER_WORD));
5389 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5392 else if (!frame_pointer_needed)
5393 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5394 GEN_INT (frame.to_allocate
5395 + frame.nregs * UNITS_PER_WORD),
5397 /* If not an i386, mov & pop is faster than "leave". */
5398 else if (TARGET_USE_LEAVE || optimize_size
5399 || !cfun->machine->use_fast_prologue_epilogue)
5400 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5403 pro_epilogue_adjust_stack (stack_pointer_rtx,
5404 hard_frame_pointer_rtx,
5407 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5409 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5414 /* First step is to deallocate the stack frame so that we can
5415 pop the registers. */
5418 if (!frame_pointer_needed)
5420 pro_epilogue_adjust_stack (stack_pointer_rtx,
5421 hard_frame_pointer_rtx,
5422 GEN_INT (offset), style);
5424 else if (frame.to_allocate)
5425 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5426 GEN_INT (frame.to_allocate), style);
5428 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5429 if (ix86_save_reg (regno, false))
5432 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5434 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5436 if (frame_pointer_needed)
5438 /* Leave results in shorter dependency chains on CPUs that are
5439 able to grok it fast. */
5440 if (TARGET_USE_LEAVE)
5441 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5442 else if (TARGET_64BIT)
5443 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5445 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5449 /* Sibcall epilogues don't want a return instruction. */
5453 if (current_function_pops_args && current_function_args_size)
5455 rtx popc = GEN_INT (current_function_pops_args);
5457 /* i386 can only pop 64K bytes. If asked to pop more, pop
5458 return address, do explicit add, and jump indirectly to the
5461 if (current_function_pops_args >= 65536)
5463 rtx ecx = gen_rtx_REG (SImode, 2);
5465 /* There is no "pascal" calling convention in 64bit ABI. */
5469 emit_insn (gen_popsi1 (ecx));
5470 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5471 emit_jump_insn (gen_return_indirect_internal (ecx));
5474 emit_jump_insn (gen_return_pop_internal (popc));
5477 emit_jump_insn (gen_return_internal ());
5480 /* Reset from the function's potential modifications. */
5483 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5484 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5486 if (pic_offset_table_rtx)
5487 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5490 /* Extract the parts of an RTL expression that is a valid memory address
5491 for an instruction. Return 0 if the structure of the address is
5492 grossly off. Return -1 if the address contains ASHIFT, so it is not
5493 strictly valid, but still used for computing length of lea instruction. */
5496 ix86_decompose_address (rtx addr, struct ix86_address *out)
5498 rtx base = NULL_RTX;
5499 rtx index = NULL_RTX;
5500 rtx disp = NULL_RTX;
5501 HOST_WIDE_INT scale = 1;
5502 rtx scale_rtx = NULL_RTX;
5504 enum ix86_address_seg seg = SEG_DEFAULT;
5506 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5508 else if (GET_CODE (addr) == PLUS)
5518 addends[n++] = XEXP (op, 1);
5521 while (GET_CODE (op) == PLUS);
5526 for (i = n; i >= 0; --i)
5529 switch (GET_CODE (op))
5534 index = XEXP (op, 0);
5535 scale_rtx = XEXP (op, 1);
5539 if (XINT (op, 1) == UNSPEC_TP
5540 && TARGET_TLS_DIRECT_SEG_REFS
5541 && seg == SEG_DEFAULT)
5542 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5571 else if (GET_CODE (addr) == MULT)
5573 index = XEXP (addr, 0); /* index*scale */
5574 scale_rtx = XEXP (addr, 1);
5576 else if (GET_CODE (addr) == ASHIFT)
5580 /* We're called for lea too, which implements ashift on occasion. */
5581 index = XEXP (addr, 0);
5582 tmp = XEXP (addr, 1);
5583 if (GET_CODE (tmp) != CONST_INT)
5585 scale = INTVAL (tmp);
5586 if ((unsigned HOST_WIDE_INT) scale > 3)
5592 disp = addr; /* displacement */
5594 /* Extract the integral value of scale. */
5597 if (GET_CODE (scale_rtx) != CONST_INT)
5599 scale = INTVAL (scale_rtx);
5602 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5603 if (base && index && scale == 1
5604 && (index == arg_pointer_rtx
5605 || index == frame_pointer_rtx
5606 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5613 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5614 if ((base == hard_frame_pointer_rtx
5615 || base == frame_pointer_rtx
5616 || base == arg_pointer_rtx) && !disp)
5619 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5620 Avoid this by transforming to [%esi+0]. */
5621 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5622 && base && !index && !disp
5624 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5627 /* Special case: encode reg+reg instead of reg*2. */
5628 if (!base && index && scale && scale == 2)
5629 base = index, scale = 1;
5631 /* Special case: scaling cannot be encoded without base or displacement. */
5632 if (!base && !disp && index && scale != 1)
5644 /* Return cost of the memory address x.
5645 For i386, it is better to use a complex address than let gcc copy
5646 the address into a reg and make a new pseudo. But not if the address
5647 requires to two regs - that would mean more pseudos with longer
5650 ix86_address_cost (rtx x)
5652 struct ix86_address parts;
5655 if (!ix86_decompose_address (x, &parts))
5658 /* More complex memory references are better. */
5659 if (parts.disp && parts.disp != const0_rtx)
5661 if (parts.seg != SEG_DEFAULT)
5664 /* Attempt to minimize number of registers in the address. */
5666 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5668 && (!REG_P (parts.index)
5669 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5673 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5675 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5676 && parts.base != parts.index)
5679 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5680 since it's predecode logic can't detect the length of instructions
5681 and it degenerates to vector decoded. Increase cost of such
5682 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5683 to split such addresses or even refuse such addresses at all.
5685 Following addressing modes are affected:
5690 The first and last case may be avoidable by explicitly coding the zero in
5691 memory address, but I don't have AMD-K6 machine handy to check this
5695 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5696 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5697 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5703 /* If X is a machine specific address (i.e. a symbol or label being
5704 referenced as a displacement from the GOT implemented using an
5705 UNSPEC), then return the base term. Otherwise return X. */
5708 ix86_find_base_term (rtx x)
5714 if (GET_CODE (x) != CONST)
5717 if (GET_CODE (term) == PLUS
5718 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5719 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5720 term = XEXP (term, 0);
5721 if (GET_CODE (term) != UNSPEC
5722 || XINT (term, 1) != UNSPEC_GOTPCREL)
5725 term = XVECEXP (term, 0, 0);
5727 if (GET_CODE (term) != SYMBOL_REF
5728 && GET_CODE (term) != LABEL_REF)
5734 term = ix86_delegitimize_address (x);
5736 if (GET_CODE (term) != SYMBOL_REF
5737 && GET_CODE (term) != LABEL_REF)
5743 /* Determine if a given RTX is a valid constant. We already know this
5744 satisfies CONSTANT_P. */
5747 legitimate_constant_p (rtx x)
5751 switch (GET_CODE (x))
5754 /* TLS symbols are not constant. */
5755 if (tls_symbolic_operand (x, Pmode))
5760 inner = XEXP (x, 0);
5762 /* Offsets of TLS symbols are never valid.
5763 Discourage CSE from creating them. */
5764 if (GET_CODE (inner) == PLUS
5765 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5768 if (GET_CODE (inner) == PLUS)
5770 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5772 inner = XEXP (inner, 0);
5775 /* Only some unspecs are valid as "constants". */
5776 if (GET_CODE (inner) == UNSPEC)
5777 switch (XINT (inner, 1))
5781 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5783 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5793 /* Otherwise we handle everything else in the move patterns. */
5797 /* Determine if it's legal to put X into the constant pool. This
5798 is not possible for the address of thread-local symbols, which
5799 is checked above. */
5802 ix86_cannot_force_const_mem (rtx x)
5804 return !legitimate_constant_p (x);
5807 /* Determine if a given RTX is a valid constant address. */
5810 constant_address_p (rtx x)
5812 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5815 /* Nonzero if the constant value X is a legitimate general operand
5816 when generating PIC code. It is given that flag_pic is on and
5817 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5820 legitimate_pic_operand_p (rtx x)
5824 switch (GET_CODE (x))
5827 inner = XEXP (x, 0);
5829 /* Only some unspecs are valid as "constants". */
5830 if (GET_CODE (inner) == UNSPEC)
5831 switch (XINT (inner, 1))
5834 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5842 return legitimate_pic_address_disp_p (x);
5849 /* Determine if a given CONST RTX is a valid memory displacement
5853 legitimate_pic_address_disp_p (rtx disp)
5857 /* In 64bit mode we can allow direct addresses of symbols and labels
5858 when they are not dynamic symbols. */
5861 /* TLS references should always be enclosed in UNSPEC. */
5862 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5864 if (GET_CODE (disp) == SYMBOL_REF
5865 && ix86_cmodel == CM_SMALL_PIC
5866 && SYMBOL_REF_LOCAL_P (disp))
5868 if (GET_CODE (disp) == LABEL_REF)
5870 if (GET_CODE (disp) == CONST
5871 && GET_CODE (XEXP (disp, 0)) == PLUS)
5873 rtx op0 = XEXP (XEXP (disp, 0), 0);
5874 rtx op1 = XEXP (XEXP (disp, 0), 1);
5876 /* TLS references should always be enclosed in UNSPEC. */
5877 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5879 if (((GET_CODE (op0) == SYMBOL_REF
5880 && ix86_cmodel == CM_SMALL_PIC
5881 && SYMBOL_REF_LOCAL_P (op0))
5882 || GET_CODE (op0) == LABEL_REF)
5883 && GET_CODE (op1) == CONST_INT
5884 && INTVAL (op1) < 16*1024*1024
5885 && INTVAL (op1) >= -16*1024*1024)
5889 if (GET_CODE (disp) != CONST)
5891 disp = XEXP (disp, 0);
5895 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5896 of GOT tables. We should not need these anyway. */
5897 if (GET_CODE (disp) != UNSPEC
5898 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5901 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5902 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5908 if (GET_CODE (disp) == PLUS)
5910 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5912 disp = XEXP (disp, 0);
5916 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5917 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5919 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5920 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5921 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5923 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5924 if (! strcmp (sym_name, "<pic base>"))
5929 if (GET_CODE (disp) != UNSPEC)
5932 switch (XINT (disp, 1))
5937 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5939 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5940 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5941 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5943 case UNSPEC_GOTTPOFF:
5944 case UNSPEC_GOTNTPOFF:
5945 case UNSPEC_INDNTPOFF:
5948 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5950 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5952 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5958 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5959 memory address for an instruction. The MODE argument is the machine mode
5960 for the MEM expression that wants to use this address.
5962 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5963 convert common non-canonical forms to canonical form so that they will
5967 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5969 struct ix86_address parts;
5970 rtx base, index, disp;
5971 HOST_WIDE_INT scale;
5972 const char *reason = NULL;
5973 rtx reason_rtx = NULL_RTX;
5975 if (TARGET_DEBUG_ADDR)
5978 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5979 GET_MODE_NAME (mode), strict);
5983 if (ix86_decompose_address (addr, &parts) <= 0)
5985 reason = "decomposition failed";
5990 index = parts.index;
5992 scale = parts.scale;
5994 /* Validate base register.
5996 Don't allow SUBREG's here, it can lead to spill failures when the base
5997 is one word out of a two word structure, which is represented internally
6004 if (GET_CODE (base) != REG)
6006 reason = "base is not a register";
6010 if (GET_MODE (base) != Pmode)
6012 reason = "base is not in Pmode";
6016 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6017 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6019 reason = "base is not valid";
6024 /* Validate index register.
6026 Don't allow SUBREG's here, it can lead to spill failures when the index
6027 is one word out of a two word structure, which is represented internally
6034 if (GET_CODE (index) != REG)
6036 reason = "index is not a register";
6040 if (GET_MODE (index) != Pmode)
6042 reason = "index is not in Pmode";
6046 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6047 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6049 reason = "index is not valid";
6054 /* Validate scale factor. */
6057 reason_rtx = GEN_INT (scale);
6060 reason = "scale without index";
6064 if (scale != 2 && scale != 4 && scale != 8)
6066 reason = "scale is not a valid multiplier";
6071 /* Validate displacement. */
6076 if (GET_CODE (disp) == CONST
6077 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6078 switch (XINT (XEXP (disp, 0), 1))
6082 case UNSPEC_GOTPCREL:
6085 goto is_legitimate_pic;
6087 case UNSPEC_GOTTPOFF:
6088 case UNSPEC_GOTNTPOFF:
6089 case UNSPEC_INDNTPOFF:
6095 reason = "invalid address unspec";
6099 else if (flag_pic && (SYMBOLIC_CONST (disp)
6101 && !machopic_operand_p (disp)
6106 if (TARGET_64BIT && (index || base))
6108 /* foo@dtpoff(%rX) is ok. */
6109 if (GET_CODE (disp) != CONST
6110 || GET_CODE (XEXP (disp, 0)) != PLUS
6111 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6112 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6113 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6114 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6116 reason = "non-constant pic memory reference";
6120 else if (! legitimate_pic_address_disp_p (disp))
6122 reason = "displacement is an invalid pic construct";
6126 /* This code used to verify that a symbolic pic displacement
6127 includes the pic_offset_table_rtx register.
6129 While this is good idea, unfortunately these constructs may
6130 be created by "adds using lea" optimization for incorrect
6139 This code is nonsensical, but results in addressing
6140 GOT table with pic_offset_table_rtx base. We can't
6141 just refuse it easily, since it gets matched by
6142 "addsi3" pattern, that later gets split to lea in the
6143 case output register differs from input. While this
6144 can be handled by separate addsi pattern for this case
6145 that never results in lea, this seems to be easier and
6146 correct fix for crash to disable this test. */
6148 else if (GET_CODE (disp) != LABEL_REF
6149 && GET_CODE (disp) != CONST_INT
6150 && (GET_CODE (disp) != CONST
6151 || !legitimate_constant_p (disp))
6152 && (GET_CODE (disp) != SYMBOL_REF
6153 || !legitimate_constant_p (disp)))
6155 reason = "displacement is not constant";
6158 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6160 reason = "displacement is out of range";
6165 /* Everything looks valid. */
6166 if (TARGET_DEBUG_ADDR)
6167 fprintf (stderr, "Success.\n");
6171 if (TARGET_DEBUG_ADDR)
6173 fprintf (stderr, "Error: %s\n", reason);
6174 debug_rtx (reason_rtx);
6179 /* Return an unique alias set for the GOT. */
6181 static HOST_WIDE_INT
6182 ix86_GOT_alias_set (void)
6184 static HOST_WIDE_INT set = -1;
6186 set = new_alias_set ();
6190 /* Return a legitimate reference for ORIG (an address) using the
6191 register REG. If REG is 0, a new pseudo is generated.
6193 There are two types of references that must be handled:
6195 1. Global data references must load the address from the GOT, via
6196 the PIC reg. An insn is emitted to do this load, and the reg is
6199 2. Static data references, constant pool addresses, and code labels
6200 compute the address as an offset from the GOT, whose base is in
6201 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6202 differentiate them from global data objects. The returned
6203 address is the PIC reg + an unspec constant.
6205 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6206 reg also appears in the address. */
6209 legitimize_pic_address (rtx orig, rtx reg)
6217 reg = gen_reg_rtx (Pmode);
6218 /* Use the generic Mach-O PIC machinery. */
6219 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6222 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6224 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6226 /* This symbol may be referenced via a displacement from the PIC
6227 base address (@GOTOFF). */
6229 if (reload_in_progress)
6230 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6231 if (GET_CODE (addr) == CONST)
6232 addr = XEXP (addr, 0);
6233 if (GET_CODE (addr) == PLUS)
6235 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6236 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6239 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6240 new = gen_rtx_CONST (Pmode, new);
6241 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6245 emit_move_insn (reg, new);
6249 else if (GET_CODE (addr) == SYMBOL_REF)
6253 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6254 new = gen_rtx_CONST (Pmode, new);
6255 new = gen_rtx_MEM (Pmode, new);
6256 RTX_UNCHANGING_P (new) = 1;
6257 set_mem_alias_set (new, ix86_GOT_alias_set ());
6260 reg = gen_reg_rtx (Pmode);
6261 /* Use directly gen_movsi, otherwise the address is loaded
6262 into register for CSE. We don't want to CSE this addresses,
6263 instead we CSE addresses from the GOT table, so skip this. */
6264 emit_insn (gen_movsi (reg, new));
6269 /* This symbol must be referenced via a load from the
6270 Global Offset Table (@GOT). */
6272 if (reload_in_progress)
6273 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6274 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6275 new = gen_rtx_CONST (Pmode, new);
6276 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6277 new = gen_rtx_MEM (Pmode, new);
6278 RTX_UNCHANGING_P (new) = 1;
6279 set_mem_alias_set (new, ix86_GOT_alias_set ());
6282 reg = gen_reg_rtx (Pmode);
6283 emit_move_insn (reg, new);
6289 if (GET_CODE (addr) == CONST)
6291 addr = XEXP (addr, 0);
6293 /* We must match stuff we generate before. Assume the only
6294 unspecs that can get here are ours. Not that we could do
6295 anything with them anyway.... */
6296 if (GET_CODE (addr) == UNSPEC
6297 || (GET_CODE (addr) == PLUS
6298 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6300 if (GET_CODE (addr) != PLUS)
6303 if (GET_CODE (addr) == PLUS)
6305 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6307 /* Check first to see if this is a constant offset from a @GOTOFF
6308 symbol reference. */
6309 if (local_symbolic_operand (op0, Pmode)
6310 && GET_CODE (op1) == CONST_INT)
6314 if (reload_in_progress)
6315 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6316 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6318 new = gen_rtx_PLUS (Pmode, new, op1);
6319 new = gen_rtx_CONST (Pmode, new);
6320 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6324 emit_move_insn (reg, new);
6330 if (INTVAL (op1) < -16*1024*1024
6331 || INTVAL (op1) >= 16*1024*1024)
6332 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6337 base = legitimize_pic_address (XEXP (addr, 0), reg);
6338 new = legitimize_pic_address (XEXP (addr, 1),
6339 base == reg ? NULL_RTX : reg);
6341 if (GET_CODE (new) == CONST_INT)
6342 new = plus_constant (base, INTVAL (new));
6345 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6347 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6348 new = XEXP (new, 1);
6350 new = gen_rtx_PLUS (Pmode, base, new);
6358 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6361 get_thread_pointer (int to_reg)
6365 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6369 reg = gen_reg_rtx (Pmode);
6370 insn = gen_rtx_SET (VOIDmode, reg, tp);
6371 insn = emit_insn (insn);
6376 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6377 false if we expect this to be used for a memory address and true if
6378 we expect to load the address into a register. */
6381 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6383 rtx dest, base, off, pic;
6388 case TLS_MODEL_GLOBAL_DYNAMIC:
6389 dest = gen_reg_rtx (Pmode);
6392 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6395 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6396 insns = get_insns ();
6399 emit_libcall_block (insns, dest, rax, x);
6402 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6405 case TLS_MODEL_LOCAL_DYNAMIC:
6406 base = gen_reg_rtx (Pmode);
6409 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6412 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6413 insns = get_insns ();
6416 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6417 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6418 emit_libcall_block (insns, base, rax, note);
6421 emit_insn (gen_tls_local_dynamic_base_32 (base));
6423 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6424 off = gen_rtx_CONST (Pmode, off);
6426 return gen_rtx_PLUS (Pmode, base, off);
6428 case TLS_MODEL_INITIAL_EXEC:
6432 type = UNSPEC_GOTNTPOFF;
6436 if (reload_in_progress)
6437 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6438 pic = pic_offset_table_rtx;
6439 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6441 else if (!TARGET_GNU_TLS)
6443 pic = gen_reg_rtx (Pmode);
6444 emit_insn (gen_set_got (pic));
6445 type = UNSPEC_GOTTPOFF;
6450 type = UNSPEC_INDNTPOFF;
6453 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6454 off = gen_rtx_CONST (Pmode, off);
6456 off = gen_rtx_PLUS (Pmode, pic, off);
6457 off = gen_rtx_MEM (Pmode, off);
6458 RTX_UNCHANGING_P (off) = 1;
6459 set_mem_alias_set (off, ix86_GOT_alias_set ());
6461 if (TARGET_64BIT || TARGET_GNU_TLS)
6463 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6464 off = force_reg (Pmode, off);
6465 return gen_rtx_PLUS (Pmode, base, off);
6469 base = get_thread_pointer (true);
6470 dest = gen_reg_rtx (Pmode);
6471 emit_insn (gen_subsi3 (dest, base, off));
6475 case TLS_MODEL_LOCAL_EXEC:
6476 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6477 (TARGET_64BIT || TARGET_GNU_TLS)
6478 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6479 off = gen_rtx_CONST (Pmode, off);
6481 if (TARGET_64BIT || TARGET_GNU_TLS)
6483 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6484 return gen_rtx_PLUS (Pmode, base, off);
6488 base = get_thread_pointer (true);
6489 dest = gen_reg_rtx (Pmode);
6490 emit_insn (gen_subsi3 (dest, base, off));
6501 /* Try machine-dependent ways of modifying an illegitimate address
6502 to be legitimate. If we find one, return the new, valid address.
6503 This macro is used in only one place: `memory_address' in explow.c.
6505 OLDX is the address as it was before break_out_memory_refs was called.
6506 In some cases it is useful to look at this to decide what needs to be done.
6508 MODE and WIN are passed so that this macro can use
6509 GO_IF_LEGITIMATE_ADDRESS.
6511 It is always safe for this macro to do nothing. It exists to recognize
6512 opportunities to optimize the output.
6514 For the 80386, we handle X+REG by loading X into a register R and
6515 using R+REG. R will go in a general reg and indexing will be used.
6516 However, if REG is a broken-out memory address or multiplication,
6517 nothing needs to be done because REG can certainly go in a general reg.
6519 When -fpic is used, special handling is needed for symbolic references.
6520 See comments by legitimize_pic_address in i386.c for details. */
6523 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6528 if (TARGET_DEBUG_ADDR)
6530 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6531 GET_MODE_NAME (mode));
6535 log = tls_symbolic_operand (x, mode);
6537 return legitimize_tls_address (x, log, false);
6539 if (flag_pic && SYMBOLIC_CONST (x))
6540 return legitimize_pic_address (x, 0);
6542 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6543 if (GET_CODE (x) == ASHIFT
6544 && GET_CODE (XEXP (x, 1)) == CONST_INT
6545 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6548 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6549 GEN_INT (1 << log));
6552 if (GET_CODE (x) == PLUS)
6554 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6556 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6557 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6558 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6561 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6562 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6563 GEN_INT (1 << log));
6566 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6567 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6568 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6571 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6572 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6573 GEN_INT (1 << log));
6576 /* Put multiply first if it isn't already. */
6577 if (GET_CODE (XEXP (x, 1)) == MULT)
6579 rtx tmp = XEXP (x, 0);
6580 XEXP (x, 0) = XEXP (x, 1);
6585 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6586 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6587 created by virtual register instantiation, register elimination, and
6588 similar optimizations. */
6589 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6592 x = gen_rtx_PLUS (Pmode,
6593 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6594 XEXP (XEXP (x, 1), 0)),
6595 XEXP (XEXP (x, 1), 1));
6599 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6600 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6601 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6602 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6603 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6604 && CONSTANT_P (XEXP (x, 1)))
6607 rtx other = NULL_RTX;
6609 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6611 constant = XEXP (x, 1);
6612 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6614 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6616 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6617 other = XEXP (x, 1);
6625 x = gen_rtx_PLUS (Pmode,
6626 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6627 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6628 plus_constant (other, INTVAL (constant)));
6632 if (changed && legitimate_address_p (mode, x, FALSE))
6635 if (GET_CODE (XEXP (x, 0)) == MULT)
6638 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6641 if (GET_CODE (XEXP (x, 1)) == MULT)
6644 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6648 && GET_CODE (XEXP (x, 1)) == REG
6649 && GET_CODE (XEXP (x, 0)) == REG)
6652 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6655 x = legitimize_pic_address (x, 0);
6658 if (changed && legitimate_address_p (mode, x, FALSE))
6661 if (GET_CODE (XEXP (x, 0)) == REG)
6663 rtx temp = gen_reg_rtx (Pmode);
6664 rtx val = force_operand (XEXP (x, 1), temp);
6666 emit_move_insn (temp, val);
6672 else if (GET_CODE (XEXP (x, 1)) == REG)
6674 rtx temp = gen_reg_rtx (Pmode);
6675 rtx val = force_operand (XEXP (x, 0), temp);
6677 emit_move_insn (temp, val);
6687 /* Print an integer constant expression in assembler syntax. Addition
6688 and subtraction are the only arithmetic that may appear in these
6689 expressions. FILE is the stdio stream to write to, X is the rtx, and
6690 CODE is the operand print code from the output string. */
6693 output_pic_addr_const (FILE *file, rtx x, int code)
6697 switch (GET_CODE (x))
6707 assemble_name (file, XSTR (x, 0));
6708 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6709 fputs ("@PLT", file);
6716 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6717 assemble_name (asm_out_file, buf);
6721 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6725 /* This used to output parentheses around the expression,
6726 but that does not work on the 386 (either ATT or BSD assembler). */
6727 output_pic_addr_const (file, XEXP (x, 0), code);
6731 if (GET_MODE (x) == VOIDmode)
6733 /* We can use %d if the number is <32 bits and positive. */
6734 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6735 fprintf (file, "0x%lx%08lx",
6736 (unsigned long) CONST_DOUBLE_HIGH (x),
6737 (unsigned long) CONST_DOUBLE_LOW (x));
6739 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6742 /* We can't handle floating point constants;
6743 PRINT_OPERAND must handle them. */
6744 output_operand_lossage ("floating constant misused");
6748 /* Some assemblers need integer constants to appear first. */
6749 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6751 output_pic_addr_const (file, XEXP (x, 0), code);
6753 output_pic_addr_const (file, XEXP (x, 1), code);
6755 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6757 output_pic_addr_const (file, XEXP (x, 1), code);
6759 output_pic_addr_const (file, XEXP (x, 0), code);
6767 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6768 output_pic_addr_const (file, XEXP (x, 0), code);
6770 output_pic_addr_const (file, XEXP (x, 1), code);
6772 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6776 if (XVECLEN (x, 0) != 1)
6778 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6779 switch (XINT (x, 1))
6782 fputs ("@GOT", file);
6785 fputs ("@GOTOFF", file);
6787 case UNSPEC_GOTPCREL:
6788 fputs ("@GOTPCREL(%rip)", file);
6790 case UNSPEC_GOTTPOFF:
6791 /* FIXME: This might be @TPOFF in Sun ld too. */
6792 fputs ("@GOTTPOFF", file);
6795 fputs ("@TPOFF", file);
6799 fputs ("@TPOFF", file);
6801 fputs ("@NTPOFF", file);
6804 fputs ("@DTPOFF", file);
6806 case UNSPEC_GOTNTPOFF:
6808 fputs ("@GOTTPOFF(%rip)", file);
6810 fputs ("@GOTNTPOFF", file);
6812 case UNSPEC_INDNTPOFF:
6813 fputs ("@INDNTPOFF", file);
6816 output_operand_lossage ("invalid UNSPEC as operand");
6822 output_operand_lossage ("invalid expression as operand");
6826 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6827 We need to handle our special PIC relocations. */
6830 i386_dwarf_output_addr_const (FILE *file, rtx x)
6833 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6837 fprintf (file, "%s", ASM_LONG);
6840 output_pic_addr_const (file, x, '\0');
6842 output_addr_const (file, x);
6846 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6847 We need to emit DTP-relative relocations. */
6850 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6852 fputs (ASM_LONG, file);
6853 output_addr_const (file, x);
6854 fputs ("@DTPOFF", file);
6860 fputs (", 0", file);
6867 /* In the name of slightly smaller debug output, and to cater to
6868 general assembler losage, recognize PIC+GOTOFF and turn it back
6869 into a direct symbol reference. */
6872 ix86_delegitimize_address (rtx orig_x)
6876 if (GET_CODE (x) == MEM)
6881 if (GET_CODE (x) != CONST
6882 || GET_CODE (XEXP (x, 0)) != UNSPEC
6883 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6884 || GET_CODE (orig_x) != MEM)
6886 return XVECEXP (XEXP (x, 0), 0, 0);
6889 if (GET_CODE (x) != PLUS
6890 || GET_CODE (XEXP (x, 1)) != CONST)
6893 if (GET_CODE (XEXP (x, 0)) == REG
6894 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6895 /* %ebx + GOT/GOTOFF */
6897 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6899 /* %ebx + %reg * scale + GOT/GOTOFF */
6901 if (GET_CODE (XEXP (y, 0)) == REG
6902 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6904 else if (GET_CODE (XEXP (y, 1)) == REG
6905 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6909 if (GET_CODE (y) != REG
6910 && GET_CODE (y) != MULT
6911 && GET_CODE (y) != ASHIFT)
6917 x = XEXP (XEXP (x, 1), 0);
6918 if (GET_CODE (x) == UNSPEC
6919 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6920 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6923 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6924 return XVECEXP (x, 0, 0);
6927 if (GET_CODE (x) == PLUS
6928 && GET_CODE (XEXP (x, 0)) == UNSPEC
6929 && GET_CODE (XEXP (x, 1)) == CONST_INT
6930 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6931 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6932 && GET_CODE (orig_x) != MEM)))
6934 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6936 return gen_rtx_PLUS (Pmode, y, x);
6944 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6949 if (mode == CCFPmode || mode == CCFPUmode)
6951 enum rtx_code second_code, bypass_code;
6952 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6953 if (bypass_code != NIL || second_code != NIL)
6955 code = ix86_fp_compare_code_to_integer (code);
6959 code = reverse_condition (code);
6970 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6975 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6976 Those same assemblers have the same but opposite losage on cmov. */
6979 suffix = fp ? "nbe" : "a";
6982 if (mode == CCNOmode || mode == CCGOCmode)
6984 else if (mode == CCmode || mode == CCGCmode)
6995 if (mode == CCNOmode || mode == CCGOCmode)
6997 else if (mode == CCmode || mode == CCGCmode)
7006 suffix = fp ? "nb" : "ae";
7009 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7019 suffix = fp ? "u" : "p";
7022 suffix = fp ? "nu" : "np";
7027 fputs (suffix, file);
7030 /* Print the name of register X to FILE based on its machine mode and number.
7031 If CODE is 'w', pretend the mode is HImode.
7032 If CODE is 'b', pretend the mode is QImode.
7033 If CODE is 'k', pretend the mode is SImode.
7034 If CODE is 'q', pretend the mode is DImode.
7035 If CODE is 'h', pretend the reg is the `high' byte register.
7036 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7039 print_reg (rtx x, int code, FILE *file)
7041 if (REGNO (x) == ARG_POINTER_REGNUM
7042 || REGNO (x) == FRAME_POINTER_REGNUM
7043 || REGNO (x) == FLAGS_REG
7044 || REGNO (x) == FPSR_REG)
7047 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7050 if (code == 'w' || MMX_REG_P (x))
7052 else if (code == 'b')
7054 else if (code == 'k')
7056 else if (code == 'q')
7058 else if (code == 'y')
7060 else if (code == 'h')
7063 code = GET_MODE_SIZE (GET_MODE (x));
7065 /* Irritatingly, AMD extended registers use different naming convention
7066 from the normal registers. */
7067 if (REX_INT_REG_P (x))
7074 error ("extended registers have no high halves");
7077 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7080 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7083 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7086 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7089 error ("unsupported operand size for extended register");
7097 if (STACK_TOP_P (x))
7099 fputs ("st(0)", file);
7106 if (! ANY_FP_REG_P (x))
7107 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7112 fputs (hi_reg_name[REGNO (x)], file);
7115 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7117 fputs (qi_reg_name[REGNO (x)], file);
7120 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7122 fputs (qi_high_reg_name[REGNO (x)], file);
7129 /* Locate some local-dynamic symbol still in use by this function
7130 so that we can print its name in some tls_local_dynamic_base
7134 get_some_local_dynamic_name (void)
7138 if (cfun->machine->some_ld_name)
7139 return cfun->machine->some_ld_name;
7141 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7143 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7144 return cfun->machine->some_ld_name;
7150 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7154 if (GET_CODE (x) == SYMBOL_REF
7155 && local_dynamic_symbolic_operand (x, Pmode))
7157 cfun->machine->some_ld_name = XSTR (x, 0);
7165 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7166 C -- print opcode suffix for set/cmov insn.
7167 c -- like C, but print reversed condition
7168 F,f -- likewise, but for floating-point.
7169 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7171 R -- print the prefix for register names.
7172 z -- print the opcode suffix for the size of the current operand.
7173 * -- print a star (in certain assembler syntax)
7174 A -- print an absolute memory reference.
7175 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7176 s -- print a shift double count, followed by the assemblers argument
7178 b -- print the QImode name of the register for the indicated operand.
7179 %b0 would print %al if operands[0] is reg 0.
7180 w -- likewise, print the HImode name of the register.
7181 k -- likewise, print the SImode name of the register.
7182 q -- likewise, print the DImode name of the register.
7183 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7184 y -- print "st(0)" instead of "st" as a register.
7185 D -- print condition for SSE cmp instruction.
7186 P -- if PIC, print an @PLT suffix.
7187 X -- don't print any sort of PIC '@' suffix for a symbol.
7188 & -- print some in-use local-dynamic symbol name.
7192 print_operand (FILE *file, rtx x, int code)
7199 if (ASSEMBLER_DIALECT == ASM_ATT)
7204 assemble_name (file, get_some_local_dynamic_name ());
7208 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7212 /* Intel syntax. For absolute addresses, registers should not
7213 be surrounded by braces. */
7214 if (GET_CODE (x) != REG)
7217 PRINT_OPERAND (file, x, 0);
7225 PRINT_OPERAND (file, x, 0);
7230 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 if (ASSEMBLER_DIALECT == ASM_ATT)
7240 if (ASSEMBLER_DIALECT == ASM_ATT)
7245 if (ASSEMBLER_DIALECT == ASM_ATT)
7250 if (ASSEMBLER_DIALECT == ASM_ATT)
7255 if (ASSEMBLER_DIALECT == ASM_ATT)
7260 /* 387 opcodes don't get size suffixes if the operands are
7262 if (STACK_REG_P (x))
7265 /* Likewise if using Intel opcodes. */
7266 if (ASSEMBLER_DIALECT == ASM_INTEL)
7269 /* This is the size of op from size of operand. */
7270 switch (GET_MODE_SIZE (GET_MODE (x)))
7273 #ifdef HAVE_GAS_FILDS_FISTS
7279 if (GET_MODE (x) == SFmode)
7294 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7296 #ifdef GAS_MNEMONICS
7322 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7324 PRINT_OPERAND (file, x, 0);
7330 /* Little bit of braindamage here. The SSE compare instructions
7331 does use completely different names for the comparisons that the
7332 fp conditional moves. */
7333 switch (GET_CODE (x))
7348 fputs ("unord", file);
7352 fputs ("neq", file);
7356 fputs ("nlt", file);
7360 fputs ("nle", file);
7363 fputs ("ord", file);
7371 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7372 if (ASSEMBLER_DIALECT == ASM_ATT)
7374 switch (GET_MODE (x))
7376 case HImode: putc ('w', file); break;
7378 case SFmode: putc ('l', file); break;
7380 case DFmode: putc ('q', file); break;
7388 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7391 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7392 if (ASSEMBLER_DIALECT == ASM_ATT)
7395 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7398 /* Like above, but reverse condition */
7400 /* Check to see if argument to %c is really a constant
7401 and not a condition code which needs to be reversed. */
7402 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7404 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7407 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7410 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7411 if (ASSEMBLER_DIALECT == ASM_ATT)
7414 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7420 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7423 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7426 int pred_val = INTVAL (XEXP (x, 0));
7428 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7429 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7431 int taken = pred_val > REG_BR_PROB_BASE / 2;
7432 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7434 /* Emit hints only in the case default branch prediction
7435 heuristics would fail. */
7436 if (taken != cputaken)
7438 /* We use 3e (DS) prefix for taken branches and
7439 2e (CS) prefix for not taken branches. */
7441 fputs ("ds ; ", file);
7443 fputs ("cs ; ", file);
7450 output_operand_lossage ("invalid operand code `%c'", code);
7454 if (GET_CODE (x) == REG)
7455 print_reg (x, code, file);
7457 else if (GET_CODE (x) == MEM)
7459 /* No `byte ptr' prefix for call instructions. */
7460 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7463 switch (GET_MODE_SIZE (GET_MODE (x)))
7465 case 1: size = "BYTE"; break;
7466 case 2: size = "WORD"; break;
7467 case 4: size = "DWORD"; break;
7468 case 8: size = "QWORD"; break;
7469 case 12: size = "XWORD"; break;
7470 case 16: size = "XMMWORD"; break;
7475 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7478 else if (code == 'w')
7480 else if (code == 'k')
7484 fputs (" PTR ", file);
7488 /* Avoid (%rip) for call operands. */
7489 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7490 && GET_CODE (x) != CONST_INT)
7491 output_addr_const (file, x);
7492 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7493 output_operand_lossage ("invalid constraints for operand");
7498 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7503 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7504 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7506 if (ASSEMBLER_DIALECT == ASM_ATT)
7508 fprintf (file, "0x%08lx", l);
7511 /* These float cases don't actually occur as immediate operands. */
7512 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7516 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7517 fprintf (file, "%s", dstr);
7520 else if (GET_CODE (x) == CONST_DOUBLE
7521 && GET_MODE (x) == XFmode)
7525 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7526 fprintf (file, "%s", dstr);
7533 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7535 if (ASSEMBLER_DIALECT == ASM_ATT)
7538 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7539 || GET_CODE (x) == LABEL_REF)
7541 if (ASSEMBLER_DIALECT == ASM_ATT)
7544 fputs ("OFFSET FLAT:", file);
7547 if (GET_CODE (x) == CONST_INT)
7548 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7550 output_pic_addr_const (file, x, code);
7552 output_addr_const (file, x);
7556 /* Print a memory operand whose address is ADDR. */
7559 print_operand_address (FILE *file, rtx addr)
7561 struct ix86_address parts;
7562 rtx base, index, disp;
7565 if (! ix86_decompose_address (addr, &parts))
7569 index = parts.index;
7571 scale = parts.scale;
7579 if (USER_LABEL_PREFIX[0] == 0)
7581 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7587 if (!base && !index)
7589 /* Displacement only requires special attention. */
7591 if (GET_CODE (disp) == CONST_INT)
7593 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7595 if (USER_LABEL_PREFIX[0] == 0)
7597 fputs ("ds:", file);
7599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7602 output_pic_addr_const (file, disp, 0);
7604 output_addr_const (file, disp);
7606 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7608 && ((GET_CODE (disp) == SYMBOL_REF
7609 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7610 || GET_CODE (disp) == LABEL_REF
7611 || (GET_CODE (disp) == CONST
7612 && GET_CODE (XEXP (disp, 0)) == PLUS
7613 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7614 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7615 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7616 fputs ("(%rip)", file);
7620 if (ASSEMBLER_DIALECT == ASM_ATT)
7625 output_pic_addr_const (file, disp, 0);
7626 else if (GET_CODE (disp) == LABEL_REF)
7627 output_asm_label (disp);
7629 output_addr_const (file, disp);
7634 print_reg (base, 0, file);
7638 print_reg (index, 0, file);
7640 fprintf (file, ",%d", scale);
7646 rtx offset = NULL_RTX;
7650 /* Pull out the offset of a symbol; print any symbol itself. */
7651 if (GET_CODE (disp) == CONST
7652 && GET_CODE (XEXP (disp, 0)) == PLUS
7653 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7655 offset = XEXP (XEXP (disp, 0), 1);
7656 disp = gen_rtx_CONST (VOIDmode,
7657 XEXP (XEXP (disp, 0), 0));
7661 output_pic_addr_const (file, disp, 0);
7662 else if (GET_CODE (disp) == LABEL_REF)
7663 output_asm_label (disp);
7664 else if (GET_CODE (disp) == CONST_INT)
7667 output_addr_const (file, disp);
7673 print_reg (base, 0, file);
7676 if (INTVAL (offset) >= 0)
7678 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7682 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7689 print_reg (index, 0, file);
7691 fprintf (file, "*%d", scale);
7699 output_addr_const_extra (FILE *file, rtx x)
7703 if (GET_CODE (x) != UNSPEC)
7706 op = XVECEXP (x, 0, 0);
7707 switch (XINT (x, 1))
7709 case UNSPEC_GOTTPOFF:
7710 output_addr_const (file, op);
7711 /* FIXME: This might be @TPOFF in Sun ld. */
7712 fputs ("@GOTTPOFF", file);
7715 output_addr_const (file, op);
7716 fputs ("@TPOFF", file);
7719 output_addr_const (file, op);
7721 fputs ("@TPOFF", file);
7723 fputs ("@NTPOFF", file);
7726 output_addr_const (file, op);
7727 fputs ("@DTPOFF", file);
7729 case UNSPEC_GOTNTPOFF:
7730 output_addr_const (file, op);
7732 fputs ("@GOTTPOFF(%rip)", file);
7734 fputs ("@GOTNTPOFF", file);
7736 case UNSPEC_INDNTPOFF:
7737 output_addr_const (file, op);
7738 fputs ("@INDNTPOFF", file);
7748 /* Split one or more DImode RTL references into pairs of SImode
7749 references. The RTL can be REG, offsettable MEM, integer constant, or
7750 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7751 split and "num" is its length. lo_half and hi_half are output arrays
7752 that parallel "operands". */
7755 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7759 rtx op = operands[num];
7761 /* simplify_subreg refuse to split volatile memory addresses,
7762 but we still have to handle it. */
7763 if (GET_CODE (op) == MEM)
7765 lo_half[num] = adjust_address (op, SImode, 0);
7766 hi_half[num] = adjust_address (op, SImode, 4);
7770 lo_half[num] = simplify_gen_subreg (SImode, op,
7771 GET_MODE (op) == VOIDmode
7772 ? DImode : GET_MODE (op), 0);
7773 hi_half[num] = simplify_gen_subreg (SImode, op,
7774 GET_MODE (op) == VOIDmode
7775 ? DImode : GET_MODE (op), 4);
7779 /* Split one or more TImode RTL references into pairs of SImode
7780 references. The RTL can be REG, offsettable MEM, integer constant, or
7781 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7782 split and "num" is its length. lo_half and hi_half are output arrays
7783 that parallel "operands". */
7786 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7790 rtx op = operands[num];
7792 /* simplify_subreg refuse to split volatile memory addresses, but we
7793 still have to handle it. */
7794 if (GET_CODE (op) == MEM)
7796 lo_half[num] = adjust_address (op, DImode, 0);
7797 hi_half[num] = adjust_address (op, DImode, 8);
7801 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7802 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7807 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7808 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7809 is the expression of the binary operation. The output may either be
7810 emitted here, or returned to the caller, like all output_* functions.
7812 There is no guarantee that the operands are the same mode, as they
7813 might be within FLOAT or FLOAT_EXTEND expressions. */
7815 #ifndef SYSV386_COMPAT
7816 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7817 wants to fix the assemblers because that causes incompatibility
7818 with gcc. No-one wants to fix gcc because that causes
7819 incompatibility with assemblers... You can use the option of
7820 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7821 #define SYSV386_COMPAT 1
7825 output_387_binary_op (rtx insn, rtx *operands)
7827 static char buf[30];
7830 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7832 #ifdef ENABLE_CHECKING
7833 /* Even if we do not want to check the inputs, this documents input
7834 constraints. Which helps in understanding the following code. */
7835 if (STACK_REG_P (operands[0])
7836 && ((REG_P (operands[1])
7837 && REGNO (operands[0]) == REGNO (operands[1])
7838 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7839 || (REG_P (operands[2])
7840 && REGNO (operands[0]) == REGNO (operands[2])
7841 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7842 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7848 switch (GET_CODE (operands[3]))
7851 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7852 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7860 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7861 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7869 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7870 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7879 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7893 if (GET_MODE (operands[0]) == SFmode)
7894 strcat (buf, "ss\t{%2, %0|%0, %2}");
7896 strcat (buf, "sd\t{%2, %0|%0, %2}");
7901 switch (GET_CODE (operands[3]))
7905 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7907 rtx temp = operands[2];
7908 operands[2] = operands[1];
7912 /* know operands[0] == operands[1]. */
7914 if (GET_CODE (operands[2]) == MEM)
7920 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7922 if (STACK_TOP_P (operands[0]))
7923 /* How is it that we are storing to a dead operand[2]?
7924 Well, presumably operands[1] is dead too. We can't
7925 store the result to st(0) as st(0) gets popped on this
7926 instruction. Instead store to operands[2] (which I
7927 think has to be st(1)). st(1) will be popped later.
7928 gcc <= 2.8.1 didn't have this check and generated
7929 assembly code that the Unixware assembler rejected. */
7930 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7932 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7936 if (STACK_TOP_P (operands[0]))
7937 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7939 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7944 if (GET_CODE (operands[1]) == MEM)
7950 if (GET_CODE (operands[2]) == MEM)
7956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7959 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7960 derived assemblers, confusingly reverse the direction of
7961 the operation for fsub{r} and fdiv{r} when the
7962 destination register is not st(0). The Intel assembler
7963 doesn't have this brain damage. Read !SYSV386_COMPAT to
7964 figure out what the hardware really does. */
7965 if (STACK_TOP_P (operands[0]))
7966 p = "{p\t%0, %2|rp\t%2, %0}";
7968 p = "{rp\t%2, %0|p\t%0, %2}";
7970 if (STACK_TOP_P (operands[0]))
7971 /* As above for fmul/fadd, we can't store to st(0). */
7972 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7974 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7979 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7982 if (STACK_TOP_P (operands[0]))
7983 p = "{rp\t%0, %1|p\t%1, %0}";
7985 p = "{p\t%1, %0|rp\t%0, %1}";
7987 if (STACK_TOP_P (operands[0]))
7988 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7990 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7995 if (STACK_TOP_P (operands[0]))
7997 if (STACK_TOP_P (operands[1]))
7998 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8000 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8003 else if (STACK_TOP_P (operands[1]))
8006 p = "{\t%1, %0|r\t%0, %1}";
8008 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8014 p = "{r\t%2, %0|\t%0, %2}";
8016 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8029 /* Output code to initialize control word copies used by
8030 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8031 is set to control word rounding downwards. */
8033 emit_i387_cw_initialization (rtx normal, rtx round_down)
8035 rtx reg = gen_reg_rtx (HImode);
8037 emit_insn (gen_x86_fnstcw_1 (normal));
8038 emit_move_insn (reg, normal);
8039 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8041 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8044 emit_move_insn (round_down, reg);
8047 /* Output code for INSN to convert a float to a signed int. OPERANDS
8048 are the insn operands. The output may be [HSD]Imode and the input
8049 operand may be [SDX]Fmode. */
8052 output_fix_trunc (rtx insn, rtx *operands)
8054 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8055 int dimode_p = GET_MODE (operands[0]) == DImode;
8057 /* Jump through a hoop or two for DImode, since the hardware has no
8058 non-popping instruction. We used to do this a different way, but
8059 that was somewhat fragile and broke with post-reload splitters. */
8060 if (dimode_p && !stack_top_dies)
8061 output_asm_insn ("fld\t%y1", operands);
8063 if (!STACK_TOP_P (operands[1]))
8066 if (GET_CODE (operands[0]) != MEM)
8069 output_asm_insn ("fldcw\t%3", operands);
8070 if (stack_top_dies || dimode_p)
8071 output_asm_insn ("fistp%z0\t%0", operands);
8073 output_asm_insn ("fist%z0\t%0", operands);
8074 output_asm_insn ("fldcw\t%2", operands);
8079 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8080 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8081 when fucom should be used. */
8084 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8087 rtx cmp_op0 = operands[0];
8088 rtx cmp_op1 = operands[1];
8089 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8094 cmp_op1 = operands[2];
8098 if (GET_MODE (operands[0]) == SFmode)
8100 return "ucomiss\t{%1, %0|%0, %1}";
8102 return "comiss\t{%1, %0|%0, %1}";
8105 return "ucomisd\t{%1, %0|%0, %1}";
8107 return "comisd\t{%1, %0|%0, %1}";
8110 if (! STACK_TOP_P (cmp_op0))
8113 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8115 if (STACK_REG_P (cmp_op1)
8117 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8118 && REGNO (cmp_op1) != FIRST_STACK_REG)
8120 /* If both the top of the 387 stack dies, and the other operand
8121 is also a stack register that dies, then this must be a
8122 `fcompp' float compare */
8126 /* There is no double popping fcomi variant. Fortunately,
8127 eflags is immune from the fstp's cc clobbering. */
8129 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8131 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8139 return "fucompp\n\tfnstsw\t%0";
8141 return "fcompp\n\tfnstsw\t%0";
8154 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8156 static const char * const alt[24] =
8168 "fcomi\t{%y1, %0|%0, %y1}",
8169 "fcomip\t{%y1, %0|%0, %y1}",
8170 "fucomi\t{%y1, %0|%0, %y1}",
8171 "fucomip\t{%y1, %0|%0, %y1}",
8178 "fcom%z2\t%y2\n\tfnstsw\t%0",
8179 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8180 "fucom%z2\t%y2\n\tfnstsw\t%0",
8181 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8183 "ficom%z2\t%y2\n\tfnstsw\t%0",
8184 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8192 mask = eflags_p << 3;
8193 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8194 mask |= unordered_p << 1;
8195 mask |= stack_top_dies;
8208 ix86_output_addr_vec_elt (FILE *file, int value)
8210 const char *directive = ASM_LONG;
8215 directive = ASM_QUAD;
8221 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8225 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8228 fprintf (file, "%s%s%d-%s%d\n",
8229 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8230 else if (HAVE_AS_GOTOFF_IN_DATA)
8231 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8233 else if (TARGET_MACHO)
8235 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8236 machopic_output_function_base_name (file);
8237 fprintf(file, "\n");
8241 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8242 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8245 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8249 ix86_expand_clear (rtx dest)
8253 /* We play register width games, which are only valid after reload. */
8254 if (!reload_completed)
8257 /* Avoid HImode and its attendant prefix byte. */
8258 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8259 dest = gen_rtx_REG (SImode, REGNO (dest));
8261 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8263 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8264 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8266 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8267 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8273 /* X is an unchanging MEM. If it is a constant pool reference, return
8274 the constant pool rtx, else NULL. */
8277 maybe_get_pool_constant (rtx x)
8279 x = ix86_delegitimize_address (XEXP (x, 0));
8281 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8282 return get_pool_constant (x);
8288 ix86_expand_move (enum machine_mode mode, rtx operands[])
8290 int strict = (reload_in_progress || reload_completed);
8292 enum tls_model model;
8297 model = tls_symbolic_operand (op1, Pmode);
8300 op1 = legitimize_tls_address (op1, model, true);
8301 op1 = force_operand (op1, op0);
8306 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8311 rtx temp = ((reload_in_progress
8312 || ((op0 && GET_CODE (op0) == REG)
8314 ? op0 : gen_reg_rtx (Pmode));
8315 op1 = machopic_indirect_data_reference (op1, temp);
8316 op1 = machopic_legitimize_pic_address (op1, mode,
8317 temp == op1 ? 0 : temp);
8319 else if (MACHOPIC_INDIRECT)
8320 op1 = machopic_indirect_data_reference (op1, 0);
8324 if (GET_CODE (op0) == MEM)
8325 op1 = force_reg (Pmode, op1);
8329 if (GET_CODE (temp) != REG)
8330 temp = gen_reg_rtx (Pmode);
8331 temp = legitimize_pic_address (op1, temp);
8336 #endif /* TARGET_MACHO */
8340 if (GET_CODE (op0) == MEM
8341 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8342 || !push_operand (op0, mode))
8343 && GET_CODE (op1) == MEM)
8344 op1 = force_reg (mode, op1);
8346 if (push_operand (op0, mode)
8347 && ! general_no_elim_operand (op1, mode))
8348 op1 = copy_to_mode_reg (mode, op1);
8350 /* Force large constants in 64bit compilation into register
8351 to get them CSEed. */
8352 if (TARGET_64BIT && mode == DImode
8353 && immediate_operand (op1, mode)
8354 && !x86_64_zero_extended_value (op1)
8355 && !register_operand (op0, mode)
8356 && optimize && !reload_completed && !reload_in_progress)
8357 op1 = copy_to_mode_reg (mode, op1);
8359 if (FLOAT_MODE_P (mode))
8361 /* If we are loading a floating point constant to a register,
8362 force the value to memory now, since we'll get better code
8363 out the back end. */
8367 else if (GET_CODE (op1) == CONST_DOUBLE)
8369 op1 = validize_mem (force_const_mem (mode, op1));
8370 if (!register_operand (op0, mode))
8372 rtx temp = gen_reg_rtx (mode);
8373 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8374 emit_move_insn (op0, temp);
8381 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8385 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8387 /* Force constants other than zero into memory. We do not know how
8388 the instructions used to build constants modify the upper 64 bits
8389 of the register, once we have that information we may be able
8390 to handle some of them more efficiently. */
8391 if ((reload_in_progress | reload_completed) == 0
8392 && register_operand (operands[0], mode)
8393 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8394 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8396 /* Make operand1 a register if it isn't already. */
8398 && !register_operand (operands[0], mode)
8399 && !register_operand (operands[1], mode))
8401 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8402 emit_move_insn (operands[0], temp);
8406 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8409 /* Attempt to expand a binary operator. Make the expansion closer to the
8410 actual machine, then just general_operand, which will allow 3 separate
8411 memory references (one output, two input) in a single insn. */
8414 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8417 int matching_memory;
8418 rtx src1, src2, dst, op, clob;
8424 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8425 if (GET_RTX_CLASS (code) == 'c'
8426 && (rtx_equal_p (dst, src2)
8427 || immediate_operand (src1, mode)))
8434 /* If the destination is memory, and we do not have matching source
8435 operands, do things in registers. */
8436 matching_memory = 0;
8437 if (GET_CODE (dst) == MEM)
8439 if (rtx_equal_p (dst, src1))
8440 matching_memory = 1;
8441 else if (GET_RTX_CLASS (code) == 'c'
8442 && rtx_equal_p (dst, src2))
8443 matching_memory = 2;
8445 dst = gen_reg_rtx (mode);
8448 /* Both source operands cannot be in memory. */
8449 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8451 if (matching_memory != 2)
8452 src2 = force_reg (mode, src2);
8454 src1 = force_reg (mode, src1);
8457 /* If the operation is not commutable, source 1 cannot be a constant
8458 or non-matching memory. */
8459 if ((CONSTANT_P (src1)
8460 || (!matching_memory && GET_CODE (src1) == MEM))
8461 && GET_RTX_CLASS (code) != 'c')
8462 src1 = force_reg (mode, src1);
8464 /* If optimizing, copy to regs to improve CSE */
8465 if (optimize && ! no_new_pseudos)
8467 if (GET_CODE (dst) == MEM)
8468 dst = gen_reg_rtx (mode);
8469 if (GET_CODE (src1) == MEM)
8470 src1 = force_reg (mode, src1);
8471 if (GET_CODE (src2) == MEM)
8472 src2 = force_reg (mode, src2);
8475 /* Emit the instruction. */
8477 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8478 if (reload_in_progress)
8480 /* Reload doesn't know about the flags register, and doesn't know that
8481 it doesn't want to clobber it. We can only do this with PLUS. */
8488 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8489 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8492 /* Fix up the destination if needed. */
8493 if (dst != operands[0])
8494 emit_move_insn (operands[0], dst);
8497 /* Return TRUE or FALSE depending on whether the binary operator meets the
8498 appropriate constraints. */
8501 ix86_binary_operator_ok (enum rtx_code code,
8502 enum machine_mode mode ATTRIBUTE_UNUSED,
8505 /* Both source operands cannot be in memory. */
8506 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8508 /* If the operation is not commutable, source 1 cannot be a constant. */
8509 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8511 /* If the destination is memory, we must have a matching source operand. */
8512 if (GET_CODE (operands[0]) == MEM
8513 && ! (rtx_equal_p (operands[0], operands[1])
8514 || (GET_RTX_CLASS (code) == 'c'
8515 && rtx_equal_p (operands[0], operands[2]))))
8517 /* If the operation is not commutable and the source 1 is memory, we must
8518 have a matching destination. */
8519 if (GET_CODE (operands[1]) == MEM
8520 && GET_RTX_CLASS (code) != 'c'
8521 && ! rtx_equal_p (operands[0], operands[1]))
8526 /* Attempt to expand a unary operator. Make the expansion closer to the
8527 actual machine, then just general_operand, which will allow 2 separate
8528 memory references (one output, one input) in a single insn. */
8531 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8534 int matching_memory;
8535 rtx src, dst, op, clob;
8540 /* If the destination is memory, and we do not have matching source
8541 operands, do things in registers. */
8542 matching_memory = 0;
8543 if (GET_CODE (dst) == MEM)
8545 if (rtx_equal_p (dst, src))
8546 matching_memory = 1;
8548 dst = gen_reg_rtx (mode);
8551 /* When source operand is memory, destination must match. */
8552 if (!matching_memory && GET_CODE (src) == MEM)
8553 src = force_reg (mode, src);
8555 /* If optimizing, copy to regs to improve CSE */
8556 if (optimize && ! no_new_pseudos)
8558 if (GET_CODE (dst) == MEM)
8559 dst = gen_reg_rtx (mode);
8560 if (GET_CODE (src) == MEM)
8561 src = force_reg (mode, src);
8564 /* Emit the instruction. */
8566 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8567 if (reload_in_progress || code == NOT)
8569 /* Reload doesn't know about the flags register, and doesn't know that
8570 it doesn't want to clobber it. */
8577 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8578 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8581 /* Fix up the destination if needed. */
8582 if (dst != operands[0])
8583 emit_move_insn (operands[0], dst);
8586 /* Return TRUE or FALSE depending on whether the unary operator meets the
8587 appropriate constraints. */
8590 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8591 enum machine_mode mode ATTRIBUTE_UNUSED,
8592 rtx operands[2] ATTRIBUTE_UNUSED)
8594 /* If one of operands is memory, source and destination must match. */
8595 if ((GET_CODE (operands[0]) == MEM
8596 || GET_CODE (operands[1]) == MEM)
8597 && ! rtx_equal_p (operands[0], operands[1]))
8602 /* Return TRUE or FALSE depending on whether the first SET in INSN
8603 has source and destination with matching CC modes, and that the
8604 CC mode is at least as constrained as REQ_MODE. */
8607 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8610 enum machine_mode set_mode;
8612 set = PATTERN (insn);
8613 if (GET_CODE (set) == PARALLEL)
8614 set = XVECEXP (set, 0, 0);
8615 if (GET_CODE (set) != SET)
8617 if (GET_CODE (SET_SRC (set)) != COMPARE)
8620 set_mode = GET_MODE (SET_DEST (set));
8624 if (req_mode != CCNOmode
8625 && (req_mode != CCmode
8626 || XEXP (SET_SRC (set), 1) != const0_rtx))
8630 if (req_mode == CCGCmode)
8634 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8638 if (req_mode == CCZmode)
8648 return (GET_MODE (SET_SRC (set)) == set_mode);
8651 /* Generate insn patterns to do an integer compare of OPERANDS. */
8654 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8656 enum machine_mode cmpmode;
8659 cmpmode = SELECT_CC_MODE (code, op0, op1);
8660 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8662 /* This is very simple, but making the interface the same as in the
8663 FP case makes the rest of the code easier. */
8664 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8665 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8667 /* Return the test that should be put into the flags user, i.e.
8668 the bcc, scc, or cmov instruction. */
8669 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8672 /* Figure out whether to use ordered or unordered fp comparisons.
8673 Return the appropriate mode to use. */
8676 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8678 /* ??? In order to make all comparisons reversible, we do all comparisons
8679 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8680 all forms trapping and nontrapping comparisons, we can make inequality
8681 comparisons trapping again, since it results in better code when using
8682 FCOM based compares. */
8683 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8687 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8689 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8690 return ix86_fp_compare_mode (code);
8693 /* Only zero flag is needed. */
8695 case NE: /* ZF!=0 */
8697 /* Codes needing carry flag. */
8698 case GEU: /* CF=0 */
8699 case GTU: /* CF=0 & ZF=0 */
8700 case LTU: /* CF=1 */
8701 case LEU: /* CF=1 | ZF=1 */
8703 /* Codes possibly doable only with sign flag when
8704 comparing against zero. */
8705 case GE: /* SF=OF or SF=0 */
8706 case LT: /* SF<>OF or SF=1 */
8707 if (op1 == const0_rtx)
8710 /* For other cases Carry flag is not required. */
8712 /* Codes doable only with sign flag when comparing
8713 against zero, but we miss jump instruction for it
8714 so we need to use relational tests against overflow
8715 that thus needs to be zero. */
8716 case GT: /* ZF=0 & SF=OF */
8717 case LE: /* ZF=1 | SF<>OF */
8718 if (op1 == const0_rtx)
8722 /* strcmp pattern do (use flags) and combine may ask us for proper
8731 /* Return the fixed registers used for condition codes. */
8734 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8741 /* If two condition code modes are compatible, return a condition code
8742 mode which is compatible with both. Otherwise, return
8745 static enum machine_mode
8746 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8751 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8754 if ((m1 == CCGCmode && m2 == CCGOCmode)
8755 || (m1 == CCGOCmode && m2 == CCGCmode))
8783 /* These are only compatible with themselves, which we already
8789 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8792 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8794 enum rtx_code swapped_code = swap_condition (code);
8795 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8796 || (ix86_fp_comparison_cost (swapped_code)
8797 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8800 /* Swap, force into registers, or otherwise massage the two operands
8801 to a fp comparison. The operands are updated in place; the new
8802 comparison code is returned. */
8804 static enum rtx_code
8805 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8807 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8808 rtx op0 = *pop0, op1 = *pop1;
8809 enum machine_mode op_mode = GET_MODE (op0);
8810 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8812 /* All of the unordered compare instructions only work on registers.
8813 The same is true of the XFmode compare instructions. The same is
8814 true of the fcomi compare instructions. */
8817 && (fpcmp_mode == CCFPUmode
8818 || op_mode == XFmode
8819 || ix86_use_fcomi_compare (code)))
8821 op0 = force_reg (op_mode, op0);
8822 op1 = force_reg (op_mode, op1);
8826 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8827 things around if they appear profitable, otherwise force op0
8830 if (standard_80387_constant_p (op0) == 0
8831 || (GET_CODE (op0) == MEM
8832 && ! (standard_80387_constant_p (op1) == 0
8833 || GET_CODE (op1) == MEM)))
8836 tmp = op0, op0 = op1, op1 = tmp;
8837 code = swap_condition (code);
8840 if (GET_CODE (op0) != REG)
8841 op0 = force_reg (op_mode, op0);
8843 if (CONSTANT_P (op1))
8845 if (standard_80387_constant_p (op1))
8846 op1 = force_reg (op_mode, op1);
8848 op1 = validize_mem (force_const_mem (op_mode, op1));
8852 /* Try to rearrange the comparison to make it cheaper. */
8853 if (ix86_fp_comparison_cost (code)
8854 > ix86_fp_comparison_cost (swap_condition (code))
8855 && (GET_CODE (op1) == REG || !no_new_pseudos))
8858 tmp = op0, op0 = op1, op1 = tmp;
8859 code = swap_condition (code);
8860 if (GET_CODE (op0) != REG)
8861 op0 = force_reg (op_mode, op0);
8869 /* Convert comparison codes we use to represent FP comparison to integer
8870 code that will result in proper branch. Return UNKNOWN if no such code
8872 static enum rtx_code
8873 ix86_fp_compare_code_to_integer (enum rtx_code code)
8902 /* Split comparison code CODE into comparisons we can do using branch
8903 instructions. BYPASS_CODE is comparison code for branch that will
8904 branch around FIRST_CODE and SECOND_CODE. If some of branches
8905 is not required, set value to NIL.
8906 We never require more than two branches. */
8908 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8909 enum rtx_code *first_code,
8910 enum rtx_code *second_code)
8916 /* The fcomi comparison sets flags as follows:
8926 case GT: /* GTU - CF=0 & ZF=0 */
8927 case GE: /* GEU - CF=0 */
8928 case ORDERED: /* PF=0 */
8929 case UNORDERED: /* PF=1 */
8930 case UNEQ: /* EQ - ZF=1 */
8931 case UNLT: /* LTU - CF=1 */
8932 case UNLE: /* LEU - CF=1 | ZF=1 */
8933 case LTGT: /* EQ - ZF=0 */
8935 case LT: /* LTU - CF=1 - fails on unordered */
8937 *bypass_code = UNORDERED;
8939 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8941 *bypass_code = UNORDERED;
8943 case EQ: /* EQ - ZF=1 - fails on unordered */
8945 *bypass_code = UNORDERED;
8947 case NE: /* NE - ZF=0 - fails on unordered */
8949 *second_code = UNORDERED;
8951 case UNGE: /* GEU - CF=0 - fails on unordered */
8953 *second_code = UNORDERED;
8955 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8957 *second_code = UNORDERED;
8962 if (!TARGET_IEEE_FP)
8969 /* Return cost of comparison done fcom + arithmetics operations on AX.
8970 All following functions do use number of instructions as a cost metrics.
8971 In future this should be tweaked to compute bytes for optimize_size and
8972 take into account performance of various instructions on various CPUs. */
8974 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8976 if (!TARGET_IEEE_FP)
8978 /* The cost of code output by ix86_expand_fp_compare. */
9006 /* Return cost of comparison done using fcomi operation.
9007 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9009 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9011 enum rtx_code bypass_code, first_code, second_code;
9012 /* Return arbitrarily high cost when instruction is not supported - this
9013 prevents gcc from using it. */
9016 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9017 return (bypass_code != NIL || second_code != NIL) + 2;
9020 /* Return cost of comparison done using sahf operation.
9021 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9023 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9025 enum rtx_code bypass_code, first_code, second_code;
9026 /* Return arbitrarily high cost when instruction is not preferred - this
9027 avoids gcc from using it. */
9028 if (!TARGET_USE_SAHF && !optimize_size)
9030 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9031 return (bypass_code != NIL || second_code != NIL) + 3;
9034 /* Compute cost of the comparison done using any method.
9035 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9037 ix86_fp_comparison_cost (enum rtx_code code)
9039 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9042 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9043 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9045 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9046 if (min > sahf_cost)
9048 if (min > fcomi_cost)
9053 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9056 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9057 rtx *second_test, rtx *bypass_test)
9059 enum machine_mode fpcmp_mode, intcmp_mode;
9061 int cost = ix86_fp_comparison_cost (code);
9062 enum rtx_code bypass_code, first_code, second_code;
9064 fpcmp_mode = ix86_fp_compare_mode (code);
9065 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9068 *second_test = NULL_RTX;
9070 *bypass_test = NULL_RTX;
9072 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9074 /* Do fcomi/sahf based test when profitable. */
9075 if ((bypass_code == NIL || bypass_test)
9076 && (second_code == NIL || second_test)
9077 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9081 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9082 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9088 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9089 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9091 scratch = gen_reg_rtx (HImode);
9092 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9093 emit_insn (gen_x86_sahf_1 (scratch));
9096 /* The FP codes work out to act like unsigned. */
9097 intcmp_mode = fpcmp_mode;
9099 if (bypass_code != NIL)
9100 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9101 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9103 if (second_code != NIL)
9104 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9105 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9110 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9111 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9112 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9114 scratch = gen_reg_rtx (HImode);
9115 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9117 /* In the unordered case, we have to check C2 for NaN's, which
9118 doesn't happen to work out to anything nice combination-wise.
9119 So do some bit twiddling on the value we've got in AH to come
9120 up with an appropriate set of condition codes. */
9122 intcmp_mode = CCNOmode;
9127 if (code == GT || !TARGET_IEEE_FP)
9129 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9134 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9135 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9136 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9137 intcmp_mode = CCmode;
9143 if (code == LT && TARGET_IEEE_FP)
9145 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9146 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9147 intcmp_mode = CCmode;
9152 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9158 if (code == GE || !TARGET_IEEE_FP)
9160 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9165 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9166 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9173 if (code == LE && TARGET_IEEE_FP)
9175 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9176 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9177 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9178 intcmp_mode = CCmode;
9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9189 if (code == EQ && TARGET_IEEE_FP)
9191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9192 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9193 intcmp_mode = CCmode;
9198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9205 if (code == NE && TARGET_IEEE_FP)
9207 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9208 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9220 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9224 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9233 /* Return the test that should be put into the flags user, i.e.
9234 the bcc, scc, or cmov instruction. */
9235 return gen_rtx_fmt_ee (code, VOIDmode,
9236 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9241 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9244 op0 = ix86_compare_op0;
9245 op1 = ix86_compare_op1;
9248 *second_test = NULL_RTX;
9250 *bypass_test = NULL_RTX;
9252 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9253 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9254 second_test, bypass_test);
9256 ret = ix86_expand_int_compare (code, op0, op1);
9261 /* Return true if the CODE will result in nontrivial jump sequence. */
9263 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9265 enum rtx_code bypass_code, first_code, second_code;
9268 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9269 return bypass_code != NIL || second_code != NIL;
9273 ix86_expand_branch (enum rtx_code code, rtx label)
9277 switch (GET_MODE (ix86_compare_op0))
9283 tmp = ix86_expand_compare (code, NULL, NULL);
9284 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9285 gen_rtx_LABEL_REF (VOIDmode, label),
9287 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9296 enum rtx_code bypass_code, first_code, second_code;
9298 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9301 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9303 /* Check whether we will use the natural sequence with one jump. If
9304 so, we can expand jump early. Otherwise delay expansion by
9305 creating compound insn to not confuse optimizers. */
9306 if (bypass_code == NIL && second_code == NIL
9309 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9310 gen_rtx_LABEL_REF (VOIDmode, label),
9315 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9316 ix86_compare_op0, ix86_compare_op1);
9317 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9318 gen_rtx_LABEL_REF (VOIDmode, label),
9320 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9322 use_fcomi = ix86_use_fcomi_compare (code);
9323 vec = rtvec_alloc (3 + !use_fcomi);
9324 RTVEC_ELT (vec, 0) = tmp;
9326 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9328 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9331 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9333 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9341 /* Expand DImode branch into multiple compare+branch. */
9343 rtx lo[2], hi[2], label2;
9344 enum rtx_code code1, code2, code3;
9346 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9348 tmp = ix86_compare_op0;
9349 ix86_compare_op0 = ix86_compare_op1;
9350 ix86_compare_op1 = tmp;
9351 code = swap_condition (code);
9353 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9354 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9356 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9357 avoid two branches. This costs one extra insn, so disable when
9358 optimizing for size. */
9360 if ((code == EQ || code == NE)
9362 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9367 if (hi[1] != const0_rtx)
9368 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9369 NULL_RTX, 0, OPTAB_WIDEN);
9372 if (lo[1] != const0_rtx)
9373 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9374 NULL_RTX, 0, OPTAB_WIDEN);
9376 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9377 NULL_RTX, 0, OPTAB_WIDEN);
9379 ix86_compare_op0 = tmp;
9380 ix86_compare_op1 = const0_rtx;
9381 ix86_expand_branch (code, label);
9385 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9386 op1 is a constant and the low word is zero, then we can just
9387 examine the high word. */
9389 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9392 case LT: case LTU: case GE: case GEU:
9393 ix86_compare_op0 = hi[0];
9394 ix86_compare_op1 = hi[1];
9395 ix86_expand_branch (code, label);
9401 /* Otherwise, we need two or three jumps. */
9403 label2 = gen_label_rtx ();
9406 code2 = swap_condition (code);
9407 code3 = unsigned_condition (code);
9411 case LT: case GT: case LTU: case GTU:
9414 case LE: code1 = LT; code2 = GT; break;
9415 case GE: code1 = GT; code2 = LT; break;
9416 case LEU: code1 = LTU; code2 = GTU; break;
9417 case GEU: code1 = GTU; code2 = LTU; break;
9419 case EQ: code1 = NIL; code2 = NE; break;
9420 case NE: code2 = NIL; break;
9428 * if (hi(a) < hi(b)) goto true;
9429 * if (hi(a) > hi(b)) goto false;
9430 * if (lo(a) < lo(b)) goto true;
9434 ix86_compare_op0 = hi[0];
9435 ix86_compare_op1 = hi[1];
9438 ix86_expand_branch (code1, label);
9440 ix86_expand_branch (code2, label2);
9442 ix86_compare_op0 = lo[0];
9443 ix86_compare_op1 = lo[1];
9444 ix86_expand_branch (code3, label);
9447 emit_label (label2);
9456 /* Split branch based on floating point condition. */
9458 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9459 rtx target1, rtx target2, rtx tmp)
9462 rtx label = NULL_RTX;
9464 int bypass_probability = -1, second_probability = -1, probability = -1;
9467 if (target2 != pc_rtx)
9470 code = reverse_condition_maybe_unordered (code);
9475 condition = ix86_expand_fp_compare (code, op1, op2,
9476 tmp, &second, &bypass);
9478 if (split_branch_probability >= 0)
9480 /* Distribute the probabilities across the jumps.
9481 Assume the BYPASS and SECOND to be always test
9483 probability = split_branch_probability;
9485 /* Value of 1 is low enough to make no need for probability
9486 to be updated. Later we may run some experiments and see
9487 if unordered values are more frequent in practice. */
9489 bypass_probability = 1;
9491 second_probability = 1;
9493 if (bypass != NULL_RTX)
9495 label = gen_label_rtx ();
9496 i = emit_jump_insn (gen_rtx_SET
9498 gen_rtx_IF_THEN_ELSE (VOIDmode,
9500 gen_rtx_LABEL_REF (VOIDmode,
9503 if (bypass_probability >= 0)
9505 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9506 GEN_INT (bypass_probability),
9509 i = emit_jump_insn (gen_rtx_SET
9511 gen_rtx_IF_THEN_ELSE (VOIDmode,
9512 condition, target1, target2)));
9513 if (probability >= 0)
9515 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9516 GEN_INT (probability),
9518 if (second != NULL_RTX)
9520 i = emit_jump_insn (gen_rtx_SET
9522 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9524 if (second_probability >= 0)
9526 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9527 GEN_INT (second_probability),
9530 if (label != NULL_RTX)
9535 ix86_expand_setcc (enum rtx_code code, rtx dest)
9537 rtx ret, tmp, tmpreg, equiv;
9538 rtx second_test, bypass_test;
9540 if (GET_MODE (ix86_compare_op0) == DImode
9542 return 0; /* FAIL */
9544 if (GET_MODE (dest) != QImode)
9547 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9548 PUT_MODE (ret, QImode);
9553 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9554 if (bypass_test || second_test)
9556 rtx test = second_test;
9558 rtx tmp2 = gen_reg_rtx (QImode);
9565 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9567 PUT_MODE (test, QImode);
9568 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9571 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9573 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9576 /* Attach a REG_EQUAL note describing the comparison result. */
9577 equiv = simplify_gen_relational (code, QImode,
9578 GET_MODE (ix86_compare_op0),
9579 ix86_compare_op0, ix86_compare_op1);
9580 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9582 return 1; /* DONE */
9585 /* Expand comparison setting or clearing carry flag. Return true when
9586 successful and set pop for the operation. */
9588 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9590 enum machine_mode mode =
9591 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9593 /* Do not handle DImode compares that go trought special path. Also we can't
9594 deal with FP compares yet. This is possible to add. */
9595 if ((mode == DImode && !TARGET_64BIT))
9597 if (FLOAT_MODE_P (mode))
9599 rtx second_test = NULL, bypass_test = NULL;
9600 rtx compare_op, compare_seq;
9602 /* Shortcut: following common codes never translate into carry flag compares. */
9603 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9604 || code == ORDERED || code == UNORDERED)
9607 /* These comparisons require zero flag; swap operands so they won't. */
9608 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9614 code = swap_condition (code);
9617 /* Try to expand the comparison and verify that we end up with carry flag
9618 based comparison. This is fails to be true only when we decide to expand
9619 comparison using arithmetic that is not too common scenario. */
9621 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9622 &second_test, &bypass_test);
9623 compare_seq = get_insns ();
9626 if (second_test || bypass_test)
9628 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9629 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9630 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9632 code = GET_CODE (compare_op);
9633 if (code != LTU && code != GEU)
9635 emit_insn (compare_seq);
9639 if (!INTEGRAL_MODE_P (mode))
9647 /* Convert a==0 into (unsigned)a<1. */
9650 if (op1 != const0_rtx)
9653 code = (code == EQ ? LTU : GEU);
9656 /* Convert a>b into b<a or a>=b-1. */
9659 if (GET_CODE (op1) == CONST_INT)
9661 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9662 /* Bail out on overflow. We still can swap operands but that
9663 would force loading of the constant into register. */
9664 if (op1 == const0_rtx
9665 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9667 code = (code == GTU ? GEU : LTU);
9674 code = (code == GTU ? LTU : GEU);
9678 /* Convert a>=0 into (unsigned)a<0x80000000. */
9681 if (mode == DImode || op1 != const0_rtx)
9683 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9684 code = (code == LT ? GEU : LTU);
9688 if (mode == DImode || op1 != constm1_rtx)
9690 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9691 code = (code == LE ? GEU : LTU);
9697 /* Swapping operands may cause constant to appear as first operand. */
9698 if (!nonimmediate_operand (op0, VOIDmode))
9702 op0 = force_reg (mode, op0);
9704 ix86_compare_op0 = op0;
9705 ix86_compare_op1 = op1;
9706 *pop = ix86_expand_compare (code, NULL, NULL);
9707 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9713 ix86_expand_int_movcc (rtx operands[])
9715 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9716 rtx compare_seq, compare_op;
9717 rtx second_test, bypass_test;
9718 enum machine_mode mode = GET_MODE (operands[0]);
9719 bool sign_bit_compare_p = false;;
9722 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9723 compare_seq = get_insns ();
9726 compare_code = GET_CODE (compare_op);
9728 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9729 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9730 sign_bit_compare_p = true;
9732 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9733 HImode insns, we'd be swallowed in word prefix ops. */
9735 if ((mode != HImode || TARGET_FAST_PREFIX)
9736 && (mode != DImode || TARGET_64BIT)
9737 && GET_CODE (operands[2]) == CONST_INT
9738 && GET_CODE (operands[3]) == CONST_INT)
9740 rtx out = operands[0];
9741 HOST_WIDE_INT ct = INTVAL (operands[2]);
9742 HOST_WIDE_INT cf = INTVAL (operands[3]);
9746 /* Sign bit compares are better done using shifts than we do by using
9748 if (sign_bit_compare_p
9749 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9750 ix86_compare_op1, &compare_op))
9752 /* Detect overlap between destination and compare sources. */
9755 if (!sign_bit_compare_p)
9759 compare_code = GET_CODE (compare_op);
9761 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9762 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9765 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9768 /* To simplify rest of code, restrict to the GEU case. */
9769 if (compare_code == LTU)
9771 HOST_WIDE_INT tmp = ct;
9774 compare_code = reverse_condition (compare_code);
9775 code = reverse_condition (code);
9780 PUT_CODE (compare_op,
9781 reverse_condition_maybe_unordered
9782 (GET_CODE (compare_op)));
9784 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9788 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9789 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9790 tmp = gen_reg_rtx (mode);
9793 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9795 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9799 if (code == GT || code == GE)
9800 code = reverse_condition (code);
9803 HOST_WIDE_INT tmp = ct;
9808 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9809 ix86_compare_op1, VOIDmode, 0, -1);
9822 tmp = expand_simple_binop (mode, PLUS,
9824 copy_rtx (tmp), 1, OPTAB_DIRECT);
9835 tmp = expand_simple_binop (mode, IOR,
9837 copy_rtx (tmp), 1, OPTAB_DIRECT);
9839 else if (diff == -1 && ct)
9849 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9851 tmp = expand_simple_binop (mode, PLUS,
9852 copy_rtx (tmp), GEN_INT (cf),
9853 copy_rtx (tmp), 1, OPTAB_DIRECT);
9861 * andl cf - ct, dest
9871 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9874 tmp = expand_simple_binop (mode, AND,
9876 gen_int_mode (cf - ct, mode),
9877 copy_rtx (tmp), 1, OPTAB_DIRECT);
9879 tmp = expand_simple_binop (mode, PLUS,
9880 copy_rtx (tmp), GEN_INT (ct),
9881 copy_rtx (tmp), 1, OPTAB_DIRECT);
9884 if (!rtx_equal_p (tmp, out))
9885 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9887 return 1; /* DONE */
9893 tmp = ct, ct = cf, cf = tmp;
9895 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9897 /* We may be reversing unordered compare to normal compare, that
9898 is not valid in general (we may convert non-trapping condition
9899 to trapping one), however on i386 we currently emit all
9900 comparisons unordered. */
9901 compare_code = reverse_condition_maybe_unordered (compare_code);
9902 code = reverse_condition_maybe_unordered (code);
9906 compare_code = reverse_condition (compare_code);
9907 code = reverse_condition (code);
9912 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9913 && GET_CODE (ix86_compare_op1) == CONST_INT)
9915 if (ix86_compare_op1 == const0_rtx
9916 && (code == LT || code == GE))
9917 compare_code = code;
9918 else if (ix86_compare_op1 == constm1_rtx)
9922 else if (code == GT)
9927 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9928 if (compare_code != NIL
9929 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9930 && (cf == -1 || ct == -1))
9932 /* If lea code below could be used, only optimize
9933 if it results in a 2 insn sequence. */
9935 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9936 || diff == 3 || diff == 5 || diff == 9)
9937 || (compare_code == LT && ct == -1)
9938 || (compare_code == GE && cf == -1))
9941 * notl op1 (if necessary)
9949 code = reverse_condition (code);
9952 out = emit_store_flag (out, code, ix86_compare_op0,
9953 ix86_compare_op1, VOIDmode, 0, -1);
9955 out = expand_simple_binop (mode, IOR,
9957 out, 1, OPTAB_DIRECT);
9958 if (out != operands[0])
9959 emit_move_insn (operands[0], out);
9961 return 1; /* DONE */
9966 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9967 || diff == 3 || diff == 5 || diff == 9)
9968 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9969 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9975 * lea cf(dest*(ct-cf)),dest
9979 * This also catches the degenerate setcc-only case.
9985 out = emit_store_flag (out, code, ix86_compare_op0,
9986 ix86_compare_op1, VOIDmode, 0, 1);
9989 /* On x86_64 the lea instruction operates on Pmode, so we need
9990 to get arithmetics done in proper mode to match. */
9992 tmp = copy_rtx (out);
9996 out1 = copy_rtx (out);
9997 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10001 tmp = gen_rtx_PLUS (mode, tmp, out1);
10007 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10010 if (!rtx_equal_p (tmp, out))
10013 out = force_operand (tmp, copy_rtx (out));
10015 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10017 if (!rtx_equal_p (out, operands[0]))
10018 emit_move_insn (operands[0], copy_rtx (out));
10020 return 1; /* DONE */
10024 * General case: Jumpful:
10025 * xorl dest,dest cmpl op1, op2
10026 * cmpl op1, op2 movl ct, dest
10027 * setcc dest jcc 1f
10028 * decl dest movl cf, dest
10029 * andl (cf-ct),dest 1:
10032 * Size 20. Size 14.
10034 * This is reasonably steep, but branch mispredict costs are
10035 * high on modern cpus, so consider failing only if optimizing
10039 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10040 && BRANCH_COST >= 2)
10046 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10047 /* We may be reversing unordered compare to normal compare,
10048 that is not valid in general (we may convert non-trapping
10049 condition to trapping one), however on i386 we currently
10050 emit all comparisons unordered. */
10051 code = reverse_condition_maybe_unordered (code);
10054 code = reverse_condition (code);
10055 if (compare_code != NIL)
10056 compare_code = reverse_condition (compare_code);
10060 if (compare_code != NIL)
10062 /* notl op1 (if needed)
10067 For x < 0 (resp. x <= -1) there will be no notl,
10068 so if possible swap the constants to get rid of the
10070 True/false will be -1/0 while code below (store flag
10071 followed by decrement) is 0/-1, so the constants need
10072 to be exchanged once more. */
10074 if (compare_code == GE || !cf)
10076 code = reverse_condition (code);
10081 HOST_WIDE_INT tmp = cf;
10086 out = emit_store_flag (out, code, ix86_compare_op0,
10087 ix86_compare_op1, VOIDmode, 0, -1);
10091 out = emit_store_flag (out, code, ix86_compare_op0,
10092 ix86_compare_op1, VOIDmode, 0, 1);
10094 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10095 copy_rtx (out), 1, OPTAB_DIRECT);
10098 out = expand_simple_binop (mode, AND, copy_rtx (out),
10099 gen_int_mode (cf - ct, mode),
10100 copy_rtx (out), 1, OPTAB_DIRECT);
10102 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10103 copy_rtx (out), 1, OPTAB_DIRECT);
10104 if (!rtx_equal_p (out, operands[0]))
10105 emit_move_insn (operands[0], copy_rtx (out));
10107 return 1; /* DONE */
10111 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10113 /* Try a few things more with specific constants and a variable. */
10116 rtx var, orig_out, out, tmp;
10118 if (BRANCH_COST <= 2)
10119 return 0; /* FAIL */
10121 /* If one of the two operands is an interesting constant, load a
10122 constant with the above and mask it in with a logical operation. */
10124 if (GET_CODE (operands[2]) == CONST_INT)
10127 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10128 operands[3] = constm1_rtx, op = and_optab;
10129 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10130 operands[3] = const0_rtx, op = ior_optab;
10132 return 0; /* FAIL */
10134 else if (GET_CODE (operands[3]) == CONST_INT)
10137 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10138 operands[2] = constm1_rtx, op = and_optab;
10139 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10140 operands[2] = const0_rtx, op = ior_optab;
10142 return 0; /* FAIL */
10145 return 0; /* FAIL */
10147 orig_out = operands[0];
10148 tmp = gen_reg_rtx (mode);
10151 /* Recurse to get the constant loaded. */
10152 if (ix86_expand_int_movcc (operands) == 0)
10153 return 0; /* FAIL */
10155 /* Mask in the interesting variable. */
10156 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10158 if (!rtx_equal_p (out, orig_out))
10159 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10161 return 1; /* DONE */
10165 * For comparison with above,
10175 if (! nonimmediate_operand (operands[2], mode))
10176 operands[2] = force_reg (mode, operands[2]);
10177 if (! nonimmediate_operand (operands[3], mode))
10178 operands[3] = force_reg (mode, operands[3]);
10180 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10182 rtx tmp = gen_reg_rtx (mode);
10183 emit_move_insn (tmp, operands[3]);
10186 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10188 rtx tmp = gen_reg_rtx (mode);
10189 emit_move_insn (tmp, operands[2]);
10193 if (! register_operand (operands[2], VOIDmode)
10195 || ! register_operand (operands[3], VOIDmode)))
10196 operands[2] = force_reg (mode, operands[2]);
10199 && ! register_operand (operands[3], VOIDmode))
10200 operands[3] = force_reg (mode, operands[3]);
10202 emit_insn (compare_seq);
10203 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10204 gen_rtx_IF_THEN_ELSE (mode,
10205 compare_op, operands[2],
10208 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10209 gen_rtx_IF_THEN_ELSE (mode,
10211 copy_rtx (operands[3]),
10212 copy_rtx (operands[0]))));
10214 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10215 gen_rtx_IF_THEN_ELSE (mode,
10217 copy_rtx (operands[2]),
10218 copy_rtx (operands[0]))));
10220 return 1; /* DONE */
10224 ix86_expand_fp_movcc (rtx operands[])
10226 enum rtx_code code;
10228 rtx compare_op, second_test, bypass_test;
10230 /* For SF/DFmode conditional moves based on comparisons
10231 in same mode, we may want to use SSE min/max instructions. */
10232 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10233 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10234 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10235 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10236 && (!TARGET_IEEE_FP
10237 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10238 /* We may be called from the post-reload splitter. */
10239 && (!REG_P (operands[0])
10240 || SSE_REG_P (operands[0])
10241 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10243 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10244 code = GET_CODE (operands[1]);
10246 /* See if we have (cross) match between comparison operands and
10247 conditional move operands. */
10248 if (rtx_equal_p (operands[2], op1))
10253 code = reverse_condition_maybe_unordered (code);
10255 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10257 /* Check for min operation. */
10258 if (code == LT || code == UNLE)
10266 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10267 if (memory_operand (op0, VOIDmode))
10268 op0 = force_reg (GET_MODE (operands[0]), op0);
10269 if (GET_MODE (operands[0]) == SFmode)
10270 emit_insn (gen_minsf3 (operands[0], op0, op1));
10272 emit_insn (gen_mindf3 (operands[0], op0, op1));
10275 /* Check for max operation. */
10276 if (code == GT || code == UNGE)
10284 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10285 if (memory_operand (op0, VOIDmode))
10286 op0 = force_reg (GET_MODE (operands[0]), op0);
10287 if (GET_MODE (operands[0]) == SFmode)
10288 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10290 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10294 /* Manage condition to be sse_comparison_operator. In case we are
10295 in non-ieee mode, try to canonicalize the destination operand
10296 to be first in the comparison - this helps reload to avoid extra
10298 if (!sse_comparison_operator (operands[1], VOIDmode)
10299 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10301 rtx tmp = ix86_compare_op0;
10302 ix86_compare_op0 = ix86_compare_op1;
10303 ix86_compare_op1 = tmp;
10304 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10305 VOIDmode, ix86_compare_op0,
10308 /* Similarly try to manage result to be first operand of conditional
10309 move. We also don't support the NE comparison on SSE, so try to
10311 if ((rtx_equal_p (operands[0], operands[3])
10312 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10313 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10315 rtx tmp = operands[2];
10316 operands[2] = operands[3];
10318 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10319 (GET_CODE (operands[1])),
10320 VOIDmode, ix86_compare_op0,
10323 if (GET_MODE (operands[0]) == SFmode)
10324 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10325 operands[2], operands[3],
10326 ix86_compare_op0, ix86_compare_op1));
10328 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10329 operands[2], operands[3],
10330 ix86_compare_op0, ix86_compare_op1));
10334 /* The floating point conditional move instructions don't directly
10335 support conditions resulting from a signed integer comparison. */
10337 code = GET_CODE (operands[1]);
10338 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10340 /* The floating point conditional move instructions don't directly
10341 support signed integer comparisons. */
10343 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10345 if (second_test != NULL || bypass_test != NULL)
10347 tmp = gen_reg_rtx (QImode);
10348 ix86_expand_setcc (code, tmp);
10350 ix86_compare_op0 = tmp;
10351 ix86_compare_op1 = const0_rtx;
10352 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10354 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10356 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10357 emit_move_insn (tmp, operands[3]);
10360 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10362 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10363 emit_move_insn (tmp, operands[2]);
10367 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10368 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10373 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10374 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10379 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10380 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10388 /* Expand conditional increment or decrement using adb/sbb instructions.
10389 The default case using setcc followed by the conditional move can be
10390 done by generic code. */
10392 ix86_expand_int_addcc (rtx operands[])
10394 enum rtx_code code = GET_CODE (operands[1]);
10396 rtx val = const0_rtx;
10397 bool fpcmp = false;
10398 enum machine_mode mode = GET_MODE (operands[0]);
10400 if (operands[3] != const1_rtx
10401 && operands[3] != constm1_rtx)
10403 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10404 ix86_compare_op1, &compare_op))
10406 code = GET_CODE (compare_op);
10408 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10409 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10412 code = ix86_fp_compare_code_to_integer (code);
10419 PUT_CODE (compare_op,
10420 reverse_condition_maybe_unordered
10421 (GET_CODE (compare_op)));
10423 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10425 PUT_MODE (compare_op, mode);
10427 /* Construct either adc or sbb insn. */
10428 if ((code == LTU) == (operands[3] == constm1_rtx))
10430 switch (GET_MODE (operands[0]))
10433 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10436 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10439 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10442 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10450 switch (GET_MODE (operands[0]))
10453 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10456 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10459 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10462 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10468 return 1; /* DONE */
10472 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10473 works for floating pointer parameters and nonoffsetable memories.
10474 For pushes, it returns just stack offsets; the values will be saved
10475 in the right order. Maximally three parts are generated. */
10478 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10483 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10485 size = (GET_MODE_SIZE (mode) + 4) / 8;
10487 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10489 if (size < 2 || size > 3)
10492 /* Optimize constant pool reference to immediates. This is used by fp
10493 moves, that force all constants to memory to allow combining. */
10494 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10496 rtx tmp = maybe_get_pool_constant (operand);
10501 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10503 /* The only non-offsetable memories we handle are pushes. */
10504 if (! push_operand (operand, VOIDmode))
10507 operand = copy_rtx (operand);
10508 PUT_MODE (operand, Pmode);
10509 parts[0] = parts[1] = parts[2] = operand;
10511 else if (!TARGET_64BIT)
10513 if (mode == DImode)
10514 split_di (&operand, 1, &parts[0], &parts[1]);
10517 if (REG_P (operand))
10519 if (!reload_completed)
10521 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10522 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10524 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10526 else if (offsettable_memref_p (operand))
10528 operand = adjust_address (operand, SImode, 0);
10529 parts[0] = operand;
10530 parts[1] = adjust_address (operand, SImode, 4);
10532 parts[2] = adjust_address (operand, SImode, 8);
10534 else if (GET_CODE (operand) == CONST_DOUBLE)
10539 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10543 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10544 parts[2] = gen_int_mode (l[2], SImode);
10547 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10552 parts[1] = gen_int_mode (l[1], SImode);
10553 parts[0] = gen_int_mode (l[0], SImode);
10561 if (mode == TImode)
10562 split_ti (&operand, 1, &parts[0], &parts[1]);
10563 if (mode == XFmode || mode == TFmode)
10565 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10566 if (REG_P (operand))
10568 if (!reload_completed)
10570 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10571 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10573 else if (offsettable_memref_p (operand))
10575 operand = adjust_address (operand, DImode, 0);
10576 parts[0] = operand;
10577 parts[1] = adjust_address (operand, upper_mode, 8);
10579 else if (GET_CODE (operand) == CONST_DOUBLE)
10584 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10585 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10586 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10587 if (HOST_BITS_PER_WIDE_INT >= 64)
10590 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10591 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10594 parts[0] = immed_double_const (l[0], l[1], DImode);
10595 if (upper_mode == SImode)
10596 parts[1] = gen_int_mode (l[2], SImode);
10597 else if (HOST_BITS_PER_WIDE_INT >= 64)
10600 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10601 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10604 parts[1] = immed_double_const (l[2], l[3], DImode);
10614 /* Emit insns to perform a move or push of DI, DF, and XF values.
10615 Return false when normal moves are needed; true when all required
10616 insns have been emitted. Operands 2-4 contain the input values
10617 int the correct order; operands 5-7 contain the output values. */
10620 ix86_split_long_move (rtx operands[])
10625 int collisions = 0;
10626 enum machine_mode mode = GET_MODE (operands[0]);
10628 /* The DFmode expanders may ask us to move double.
10629 For 64bit target this is single move. By hiding the fact
10630 here we simplify i386.md splitters. */
10631 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10633 /* Optimize constant pool reference to immediates. This is used by
10634 fp moves, that force all constants to memory to allow combining. */
10636 if (GET_CODE (operands[1]) == MEM
10637 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10638 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10639 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10640 if (push_operand (operands[0], VOIDmode))
10642 operands[0] = copy_rtx (operands[0]);
10643 PUT_MODE (operands[0], Pmode);
10646 operands[0] = gen_lowpart (DImode, operands[0]);
10647 operands[1] = gen_lowpart (DImode, operands[1]);
10648 emit_move_insn (operands[0], operands[1]);
10652 /* The only non-offsettable memory we handle is push. */
10653 if (push_operand (operands[0], VOIDmode))
10655 else if (GET_CODE (operands[0]) == MEM
10656 && ! offsettable_memref_p (operands[0]))
10659 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10660 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10662 /* When emitting push, take care for source operands on the stack. */
10663 if (push && GET_CODE (operands[1]) == MEM
10664 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10667 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10668 XEXP (part[1][2], 0));
10669 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10670 XEXP (part[1][1], 0));
10673 /* We need to do copy in the right order in case an address register
10674 of the source overlaps the destination. */
10675 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10677 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10679 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10682 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10685 /* Collision in the middle part can be handled by reordering. */
10686 if (collisions == 1 && nparts == 3
10687 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10690 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10691 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10694 /* If there are more collisions, we can't handle it by reordering.
10695 Do an lea to the last part and use only one colliding move. */
10696 else if (collisions > 1)
10702 base = part[0][nparts - 1];
10704 /* Handle the case when the last part isn't valid for lea.
10705 Happens in 64-bit mode storing the 12-byte XFmode. */
10706 if (GET_MODE (base) != Pmode)
10707 base = gen_rtx_REG (Pmode, REGNO (base));
10709 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10710 part[1][0] = replace_equiv_address (part[1][0], base);
10711 part[1][1] = replace_equiv_address (part[1][1],
10712 plus_constant (base, UNITS_PER_WORD));
10714 part[1][2] = replace_equiv_address (part[1][2],
10715 plus_constant (base, 8));
10725 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10726 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10727 emit_move_insn (part[0][2], part[1][2]);
10732 /* In 64bit mode we don't have 32bit push available. In case this is
10733 register, it is OK - we will just use larger counterpart. We also
10734 retype memory - these comes from attempt to avoid REX prefix on
10735 moving of second half of TFmode value. */
10736 if (GET_MODE (part[1][1]) == SImode)
10738 if (GET_CODE (part[1][1]) == MEM)
10739 part[1][1] = adjust_address (part[1][1], DImode, 0);
10740 else if (REG_P (part[1][1]))
10741 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10744 if (GET_MODE (part[1][0]) == SImode)
10745 part[1][0] = part[1][1];
10748 emit_move_insn (part[0][1], part[1][1]);
10749 emit_move_insn (part[0][0], part[1][0]);
10753 /* Choose correct order to not overwrite the source before it is copied. */
10754 if ((REG_P (part[0][0])
10755 && REG_P (part[1][1])
10756 && (REGNO (part[0][0]) == REGNO (part[1][1])
10758 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10760 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10764 operands[2] = part[0][2];
10765 operands[3] = part[0][1];
10766 operands[4] = part[0][0];
10767 operands[5] = part[1][2];
10768 operands[6] = part[1][1];
10769 operands[7] = part[1][0];
10773 operands[2] = part[0][1];
10774 operands[3] = part[0][0];
10775 operands[5] = part[1][1];
10776 operands[6] = part[1][0];
10783 operands[2] = part[0][0];
10784 operands[3] = part[0][1];
10785 operands[4] = part[0][2];
10786 operands[5] = part[1][0];
10787 operands[6] = part[1][1];
10788 operands[7] = part[1][2];
10792 operands[2] = part[0][0];
10793 operands[3] = part[0][1];
10794 operands[5] = part[1][0];
10795 operands[6] = part[1][1];
10798 emit_move_insn (operands[2], operands[5]);
10799 emit_move_insn (operands[3], operands[6]);
10801 emit_move_insn (operands[4], operands[7]);
10807 ix86_split_ashldi (rtx *operands, rtx scratch)
10809 rtx low[2], high[2];
10812 if (GET_CODE (operands[2]) == CONST_INT)
10814 split_di (operands, 2, low, high);
10815 count = INTVAL (operands[2]) & 63;
10819 emit_move_insn (high[0], low[1]);
10820 emit_move_insn (low[0], const0_rtx);
10823 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10827 if (!rtx_equal_p (operands[0], operands[1]))
10828 emit_move_insn (operands[0], operands[1]);
10829 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10830 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10835 if (!rtx_equal_p (operands[0], operands[1]))
10836 emit_move_insn (operands[0], operands[1]);
10838 split_di (operands, 1, low, high);
10840 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10841 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10843 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10845 if (! no_new_pseudos)
10846 scratch = force_reg (SImode, const0_rtx);
10848 emit_move_insn (scratch, const0_rtx);
10850 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10854 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10859 ix86_split_ashrdi (rtx *operands, rtx scratch)
10861 rtx low[2], high[2];
10864 if (GET_CODE (operands[2]) == CONST_INT)
10866 split_di (operands, 2, low, high);
10867 count = INTVAL (operands[2]) & 63;
10871 emit_move_insn (low[0], high[1]);
10873 if (! reload_completed)
10874 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10877 emit_move_insn (high[0], low[0]);
10878 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10882 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10886 if (!rtx_equal_p (operands[0], operands[1]))
10887 emit_move_insn (operands[0], operands[1]);
10888 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10889 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10894 if (!rtx_equal_p (operands[0], operands[1]))
10895 emit_move_insn (operands[0], operands[1]);
10897 split_di (operands, 1, low, high);
10899 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10900 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10902 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10904 if (! no_new_pseudos)
10905 scratch = gen_reg_rtx (SImode);
10906 emit_move_insn (scratch, high[0]);
10907 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10908 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10912 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10917 ix86_split_lshrdi (rtx *operands, rtx scratch)
10919 rtx low[2], high[2];
10922 if (GET_CODE (operands[2]) == CONST_INT)
10924 split_di (operands, 2, low, high);
10925 count = INTVAL (operands[2]) & 63;
10929 emit_move_insn (low[0], high[1]);
10930 emit_move_insn (high[0], const0_rtx);
10933 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10937 if (!rtx_equal_p (operands[0], operands[1]))
10938 emit_move_insn (operands[0], operands[1]);
10939 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10940 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10945 if (!rtx_equal_p (operands[0], operands[1]))
10946 emit_move_insn (operands[0], operands[1]);
10948 split_di (operands, 1, low, high);
10950 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10951 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10953 /* Heh. By reversing the arguments, we can reuse this pattern. */
10954 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10956 if (! no_new_pseudos)
10957 scratch = force_reg (SImode, const0_rtx);
10959 emit_move_insn (scratch, const0_rtx);
10961 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10965 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10969 /* Helper function for the string operations below. Dest VARIABLE whether
10970 it is aligned to VALUE bytes. If true, jump to the label. */
10972 ix86_expand_aligntest (rtx variable, int value)
10974 rtx label = gen_label_rtx ();
10975 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10976 if (GET_MODE (variable) == DImode)
10977 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10979 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10980 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10985 /* Adjust COUNTER by the VALUE. */
10987 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10989 if (GET_MODE (countreg) == DImode)
10990 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10992 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10995 /* Zero extend possibly SImode EXP to Pmode register. */
10997 ix86_zero_extend_to_Pmode (rtx exp)
11000 if (GET_MODE (exp) == VOIDmode)
11001 return force_reg (Pmode, exp);
11002 if (GET_MODE (exp) == Pmode)
11003 return copy_to_mode_reg (Pmode, exp);
11004 r = gen_reg_rtx (Pmode);
11005 emit_insn (gen_zero_extendsidi2 (r, exp));
11009 /* Expand string move (memcpy) operation. Use i386 string operations when
11010 profitable. expand_clrstr contains similar code. */
11012 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11014 rtx srcreg, destreg, countreg, srcexp, destexp;
11015 enum machine_mode counter_mode;
11016 HOST_WIDE_INT align = 0;
11017 unsigned HOST_WIDE_INT count = 0;
11019 if (GET_CODE (align_exp) == CONST_INT)
11020 align = INTVAL (align_exp);
11022 /* Can't use any of this if the user has appropriated esi or edi. */
11023 if (global_regs[4] || global_regs[5])
11026 /* This simple hack avoids all inlining code and simplifies code below. */
11027 if (!TARGET_ALIGN_STRINGOPS)
11030 if (GET_CODE (count_exp) == CONST_INT)
11032 count = INTVAL (count_exp);
11033 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11037 /* Figure out proper mode for counter. For 32bits it is always SImode,
11038 for 64bits use SImode when possible, otherwise DImode.
11039 Set count to number of bytes copied when known at compile time. */
11040 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11041 || x86_64_zero_extended_value (count_exp))
11042 counter_mode = SImode;
11044 counter_mode = DImode;
11046 if (counter_mode != SImode && counter_mode != DImode)
11049 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11050 if (destreg != XEXP (dst, 0))
11051 dst = replace_equiv_address_nv (dst, destreg);
11052 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11053 if (srcreg != XEXP (src, 0))
11054 src = replace_equiv_address_nv (src, srcreg);
11056 /* When optimizing for size emit simple rep ; movsb instruction for
11057 counts not divisible by 4. */
11059 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11061 emit_insn (gen_cld ());
11062 countreg = ix86_zero_extend_to_Pmode (count_exp);
11063 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11064 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11065 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11069 /* For constant aligned (or small unaligned) copies use rep movsl
11070 followed by code copying the rest. For PentiumPro ensure 8 byte
11071 alignment to allow rep movsl acceleration. */
11073 else if (count != 0
11075 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11076 || optimize_size || count < (unsigned int) 64))
11078 unsigned HOST_WIDE_INT offset = 0;
11079 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11080 rtx srcmem, dstmem;
11082 emit_insn (gen_cld ());
11083 if (count & ~(size - 1))
11085 countreg = copy_to_mode_reg (counter_mode,
11086 GEN_INT ((count >> (size == 4 ? 2 : 3))
11087 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11088 countreg = ix86_zero_extend_to_Pmode (countreg);
11090 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11091 GEN_INT (size == 4 ? 2 : 3));
11092 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11093 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11095 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11096 countreg, destexp, srcexp));
11097 offset = count & ~(size - 1);
11099 if (size == 8 && (count & 0x04))
11101 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11103 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11105 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11110 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11112 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11114 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11119 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11121 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11123 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11126 /* The generic code based on the glibc implementation:
11127 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11128 allowing accelerated copying there)
11129 - copy the data using rep movsl
11130 - copy the rest. */
11135 rtx srcmem, dstmem;
11136 int desired_alignment = (TARGET_PENTIUMPRO
11137 && (count == 0 || count >= (unsigned int) 260)
11138 ? 8 : UNITS_PER_WORD);
11139 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11140 dst = change_address (dst, BLKmode, destreg);
11141 src = change_address (src, BLKmode, srcreg);
11143 /* In case we don't know anything about the alignment, default to
11144 library version, since it is usually equally fast and result in
11147 Also emit call when we know that the count is large and call overhead
11148 will not be important. */
11149 if (!TARGET_INLINE_ALL_STRINGOPS
11150 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11153 if (TARGET_SINGLE_STRINGOP)
11154 emit_insn (gen_cld ());
11156 countreg2 = gen_reg_rtx (Pmode);
11157 countreg = copy_to_mode_reg (counter_mode, count_exp);
11159 /* We don't use loops to align destination and to copy parts smaller
11160 than 4 bytes, because gcc is able to optimize such code better (in
11161 the case the destination or the count really is aligned, gcc is often
11162 able to predict the branches) and also it is friendlier to the
11163 hardware branch prediction.
11165 Using loops is beneficial for generic case, because we can
11166 handle small counts using the loops. Many CPUs (such as Athlon)
11167 have large REP prefix setup costs.
11169 This is quite costly. Maybe we can revisit this decision later or
11170 add some customizability to this code. */
11172 if (count == 0 && align < desired_alignment)
11174 label = gen_label_rtx ();
11175 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11176 LEU, 0, counter_mode, 1, label);
11180 rtx label = ix86_expand_aligntest (destreg, 1);
11181 srcmem = change_address (src, QImode, srcreg);
11182 dstmem = change_address (dst, QImode, destreg);
11183 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11184 ix86_adjust_counter (countreg, 1);
11185 emit_label (label);
11186 LABEL_NUSES (label) = 1;
11190 rtx label = ix86_expand_aligntest (destreg, 2);
11191 srcmem = change_address (src, HImode, srcreg);
11192 dstmem = change_address (dst, HImode, destreg);
11193 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11194 ix86_adjust_counter (countreg, 2);
11195 emit_label (label);
11196 LABEL_NUSES (label) = 1;
11198 if (align <= 4 && desired_alignment > 4)
11200 rtx label = ix86_expand_aligntest (destreg, 4);
11201 srcmem = change_address (src, SImode, srcreg);
11202 dstmem = change_address (dst, SImode, destreg);
11203 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11204 ix86_adjust_counter (countreg, 4);
11205 emit_label (label);
11206 LABEL_NUSES (label) = 1;
11209 if (label && desired_alignment > 4 && !TARGET_64BIT)
11211 emit_label (label);
11212 LABEL_NUSES (label) = 1;
11215 if (!TARGET_SINGLE_STRINGOP)
11216 emit_insn (gen_cld ());
11219 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11221 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11225 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11226 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11228 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11229 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11230 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11231 countreg2, destexp, srcexp));
11235 emit_label (label);
11236 LABEL_NUSES (label) = 1;
11238 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11240 srcmem = change_address (src, SImode, srcreg);
11241 dstmem = change_address (dst, SImode, destreg);
11242 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11244 if ((align <= 4 || count == 0) && TARGET_64BIT)
11246 rtx label = ix86_expand_aligntest (countreg, 4);
11247 srcmem = change_address (src, SImode, srcreg);
11248 dstmem = change_address (dst, SImode, destreg);
11249 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11250 emit_label (label);
11251 LABEL_NUSES (label) = 1;
11253 if (align > 2 && count != 0 && (count & 2))
11255 srcmem = change_address (src, HImode, srcreg);
11256 dstmem = change_address (dst, HImode, destreg);
11257 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11259 if (align <= 2 || count == 0)
11261 rtx label = ix86_expand_aligntest (countreg, 2);
11262 srcmem = change_address (src, HImode, srcreg);
11263 dstmem = change_address (dst, HImode, destreg);
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11265 emit_label (label);
11266 LABEL_NUSES (label) = 1;
11268 if (align > 1 && count != 0 && (count & 1))
11270 srcmem = change_address (src, QImode, srcreg);
11271 dstmem = change_address (dst, QImode, destreg);
11272 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11274 if (align <= 1 || count == 0)
11276 rtx label = ix86_expand_aligntest (countreg, 1);
11277 srcmem = change_address (src, QImode, srcreg);
11278 dstmem = change_address (dst, QImode, destreg);
11279 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11280 emit_label (label);
11281 LABEL_NUSES (label) = 1;
11288 /* Expand string clear operation (bzero). Use i386 string operations when
11289 profitable. expand_movstr contains similar code. */
11291 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11293 rtx destreg, zeroreg, countreg, destexp;
11294 enum machine_mode counter_mode;
11295 HOST_WIDE_INT align = 0;
11296 unsigned HOST_WIDE_INT count = 0;
11298 if (GET_CODE (align_exp) == CONST_INT)
11299 align = INTVAL (align_exp);
11301 /* Can't use any of this if the user has appropriated esi. */
11302 if (global_regs[4])
11305 /* This simple hack avoids all inlining code and simplifies code below. */
11306 if (!TARGET_ALIGN_STRINGOPS)
11309 if (GET_CODE (count_exp) == CONST_INT)
11311 count = INTVAL (count_exp);
11312 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11315 /* Figure out proper mode for counter. For 32bits it is always SImode,
11316 for 64bits use SImode when possible, otherwise DImode.
11317 Set count to number of bytes copied when known at compile time. */
11318 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11319 || x86_64_zero_extended_value (count_exp))
11320 counter_mode = SImode;
11322 counter_mode = DImode;
11324 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11325 if (destreg != XEXP (dst, 0))
11326 dst = replace_equiv_address_nv (dst, destreg);
11328 emit_insn (gen_cld ());
11330 /* When optimizing for size emit simple rep ; movsb instruction for
11331 counts not divisible by 4. */
11333 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11335 countreg = ix86_zero_extend_to_Pmode (count_exp);
11336 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11337 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11338 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11340 else if (count != 0
11342 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11343 || optimize_size || count < (unsigned int) 64))
11345 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11346 unsigned HOST_WIDE_INT offset = 0;
11348 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11349 if (count & ~(size - 1))
11351 countreg = copy_to_mode_reg (counter_mode,
11352 GEN_INT ((count >> (size == 4 ? 2 : 3))
11353 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11354 countreg = ix86_zero_extend_to_Pmode (countreg);
11355 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11356 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11357 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11358 offset = count & ~(size - 1);
11360 if (size == 8 && (count & 0x04))
11362 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11364 emit_insn (gen_strset (destreg, mem,
11365 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11370 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11372 emit_insn (gen_strset (destreg, mem,
11373 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11378 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11380 emit_insn (gen_strset (destreg, mem,
11381 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11388 /* Compute desired alignment of the string operation. */
11389 int desired_alignment = (TARGET_PENTIUMPRO
11390 && (count == 0 || count >= (unsigned int) 260)
11391 ? 8 : UNITS_PER_WORD);
11393 /* In case we don't know anything about the alignment, default to
11394 library version, since it is usually equally fast and result in
11397 Also emit call when we know that the count is large and call overhead
11398 will not be important. */
11399 if (!TARGET_INLINE_ALL_STRINGOPS
11400 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11403 if (TARGET_SINGLE_STRINGOP)
11404 emit_insn (gen_cld ());
11406 countreg2 = gen_reg_rtx (Pmode);
11407 countreg = copy_to_mode_reg (counter_mode, count_exp);
11408 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11409 /* Get rid of MEM_OFFSET, it won't be accurate. */
11410 dst = change_address (dst, BLKmode, destreg);
11412 if (count == 0 && align < desired_alignment)
11414 label = gen_label_rtx ();
11415 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11416 LEU, 0, counter_mode, 1, label);
11420 rtx label = ix86_expand_aligntest (destreg, 1);
11421 emit_insn (gen_strset (destreg, dst,
11422 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11423 ix86_adjust_counter (countreg, 1);
11424 emit_label (label);
11425 LABEL_NUSES (label) = 1;
11429 rtx label = ix86_expand_aligntest (destreg, 2);
11430 emit_insn (gen_strset (destreg, dst,
11431 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11432 ix86_adjust_counter (countreg, 2);
11433 emit_label (label);
11434 LABEL_NUSES (label) = 1;
11436 if (align <= 4 && desired_alignment > 4)
11438 rtx label = ix86_expand_aligntest (destreg, 4);
11439 emit_insn (gen_strset (destreg, dst,
11441 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11443 ix86_adjust_counter (countreg, 4);
11444 emit_label (label);
11445 LABEL_NUSES (label) = 1;
11448 if (label && desired_alignment > 4 && !TARGET_64BIT)
11450 emit_label (label);
11451 LABEL_NUSES (label) = 1;
11455 if (!TARGET_SINGLE_STRINGOP)
11456 emit_insn (gen_cld ());
11459 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11461 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11465 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11466 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11468 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11469 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11473 emit_label (label);
11474 LABEL_NUSES (label) = 1;
11477 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11478 emit_insn (gen_strset (destreg, dst,
11479 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11480 if (TARGET_64BIT && (align <= 4 || count == 0))
11482 rtx label = ix86_expand_aligntest (countreg, 4);
11483 emit_insn (gen_strset (destreg, dst,
11484 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11485 emit_label (label);
11486 LABEL_NUSES (label) = 1;
11488 if (align > 2 && count != 0 && (count & 2))
11489 emit_insn (gen_strset (destreg, dst,
11490 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11491 if (align <= 2 || count == 0)
11493 rtx label = ix86_expand_aligntest (countreg, 2);
11494 emit_insn (gen_strset (destreg, dst,
11495 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11496 emit_label (label);
11497 LABEL_NUSES (label) = 1;
11499 if (align > 1 && count != 0 && (count & 1))
11500 emit_insn (gen_strset (destreg, dst,
11501 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11502 if (align <= 1 || count == 0)
11504 rtx label = ix86_expand_aligntest (countreg, 1);
11505 emit_insn (gen_strset (destreg, dst,
11506 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11507 emit_label (label);
11508 LABEL_NUSES (label) = 1;
11514 /* Expand strlen. */
11516 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11518 rtx addr, scratch1, scratch2, scratch3, scratch4;
11520 /* The generic case of strlen expander is long. Avoid it's
11521 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11523 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11524 && !TARGET_INLINE_ALL_STRINGOPS
11526 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11529 addr = force_reg (Pmode, XEXP (src, 0));
11530 scratch1 = gen_reg_rtx (Pmode);
11532 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11535 /* Well it seems that some optimizer does not combine a call like
11536 foo(strlen(bar), strlen(bar));
11537 when the move and the subtraction is done here. It does calculate
11538 the length just once when these instructions are done inside of
11539 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11540 often used and I use one fewer register for the lifetime of
11541 output_strlen_unroll() this is better. */
11543 emit_move_insn (out, addr);
11545 ix86_expand_strlensi_unroll_1 (out, src, align);
11547 /* strlensi_unroll_1 returns the address of the zero at the end of
11548 the string, like memchr(), so compute the length by subtracting
11549 the start address. */
11551 emit_insn (gen_subdi3 (out, out, addr));
11553 emit_insn (gen_subsi3 (out, out, addr));
11558 scratch2 = gen_reg_rtx (Pmode);
11559 scratch3 = gen_reg_rtx (Pmode);
11560 scratch4 = force_reg (Pmode, constm1_rtx);
11562 emit_move_insn (scratch3, addr);
11563 eoschar = force_reg (QImode, eoschar);
11565 emit_insn (gen_cld ());
11566 src = replace_equiv_address_nv (src, scratch3);
11568 /* If .md starts supporting :P, this can be done in .md. */
11569 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11570 scratch4), UNSPEC_SCAS);
11571 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11574 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11575 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11579 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11580 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11586 /* Expand the appropriate insns for doing strlen if not just doing
11589 out = result, initialized with the start address
11590 align_rtx = alignment of the address.
11591 scratch = scratch register, initialized with the startaddress when
11592 not aligned, otherwise undefined
11594 This is just the body. It needs the initializations mentioned above and
11595 some address computing at the end. These things are done in i386.md. */
11598 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11602 rtx align_2_label = NULL_RTX;
11603 rtx align_3_label = NULL_RTX;
11604 rtx align_4_label = gen_label_rtx ();
11605 rtx end_0_label = gen_label_rtx ();
11607 rtx tmpreg = gen_reg_rtx (SImode);
11608 rtx scratch = gen_reg_rtx (SImode);
11612 if (GET_CODE (align_rtx) == CONST_INT)
11613 align = INTVAL (align_rtx);
11615 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11617 /* Is there a known alignment and is it less than 4? */
11620 rtx scratch1 = gen_reg_rtx (Pmode);
11621 emit_move_insn (scratch1, out);
11622 /* Is there a known alignment and is it not 2? */
11625 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11626 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11628 /* Leave just the 3 lower bits. */
11629 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11630 NULL_RTX, 0, OPTAB_WIDEN);
11632 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11633 Pmode, 1, align_4_label);
11634 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11635 Pmode, 1, align_2_label);
11636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11637 Pmode, 1, align_3_label);
11641 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11642 check if is aligned to 4 - byte. */
11644 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11645 NULL_RTX, 0, OPTAB_WIDEN);
11647 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11648 Pmode, 1, align_4_label);
11651 mem = change_address (src, QImode, out);
11653 /* Now compare the bytes. */
11655 /* Compare the first n unaligned byte on a byte per byte basis. */
11656 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11657 QImode, 1, end_0_label);
11659 /* Increment the address. */
11661 emit_insn (gen_adddi3 (out, out, const1_rtx));
11663 emit_insn (gen_addsi3 (out, out, const1_rtx));
11665 /* Not needed with an alignment of 2 */
11668 emit_label (align_2_label);
11670 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11674 emit_insn (gen_adddi3 (out, out, const1_rtx));
11676 emit_insn (gen_addsi3 (out, out, const1_rtx));
11678 emit_label (align_3_label);
11681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11685 emit_insn (gen_adddi3 (out, out, const1_rtx));
11687 emit_insn (gen_addsi3 (out, out, const1_rtx));
11690 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11691 align this loop. It gives only huge programs, but does not help to
11693 emit_label (align_4_label);
11695 mem = change_address (src, SImode, out);
11696 emit_move_insn (scratch, mem);
11698 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11700 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11702 /* This formula yields a nonzero result iff one of the bytes is zero.
11703 This saves three branches inside loop and many cycles. */
11705 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11706 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11707 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11708 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11709 gen_int_mode (0x80808080, SImode)));
11710 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11715 rtx reg = gen_reg_rtx (SImode);
11716 rtx reg2 = gen_reg_rtx (Pmode);
11717 emit_move_insn (reg, tmpreg);
11718 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11720 /* If zero is not in the first two bytes, move two bytes forward. */
11721 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11722 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11723 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11724 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11725 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11728 /* Emit lea manually to avoid clobbering of flags. */
11729 emit_insn (gen_rtx_SET (SImode, reg2,
11730 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11732 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11733 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11734 emit_insn (gen_rtx_SET (VOIDmode, out,
11735 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11742 rtx end_2_label = gen_label_rtx ();
11743 /* Is zero in the first two bytes? */
11745 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11746 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11747 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11748 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11749 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11751 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11752 JUMP_LABEL (tmp) = end_2_label;
11754 /* Not in the first two. Move two bytes forward. */
11755 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11757 emit_insn (gen_adddi3 (out, out, const2_rtx));
11759 emit_insn (gen_addsi3 (out, out, const2_rtx));
11761 emit_label (end_2_label);
11765 /* Avoid branch in fixing the byte. */
11766 tmpreg = gen_lowpart (QImode, tmpreg);
11767 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11768 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11770 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11772 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11774 emit_label (end_0_label);
11778 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11779 rtx callarg2 ATTRIBUTE_UNUSED,
11780 rtx pop, int sibcall)
11782 rtx use = NULL, call;
11784 if (pop == const0_rtx)
11786 if (TARGET_64BIT && pop)
11790 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11791 fnaddr = machopic_indirect_call_target (fnaddr);
11793 /* Static functions and indirect calls don't need the pic register. */
11794 if (! TARGET_64BIT && flag_pic
11795 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11796 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11797 use_reg (&use, pic_offset_table_rtx);
11799 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11801 rtx al = gen_rtx_REG (QImode, 0);
11802 emit_move_insn (al, callarg2);
11803 use_reg (&use, al);
11805 #endif /* TARGET_MACHO */
11807 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11809 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11810 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11812 if (sibcall && TARGET_64BIT
11813 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11816 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11817 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11818 emit_move_insn (fnaddr, addr);
11819 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11822 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11824 call = gen_rtx_SET (VOIDmode, retval, call);
11827 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11828 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11829 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11832 call = emit_call_insn (call);
11834 CALL_INSN_FUNCTION_USAGE (call) = use;
11838 /* Clear stack slot assignments remembered from previous functions.
11839 This is called from INIT_EXPANDERS once before RTL is emitted for each
11842 static struct machine_function *
11843 ix86_init_machine_status (void)
11845 struct machine_function *f;
11847 f = ggc_alloc_cleared (sizeof (struct machine_function));
11848 f->use_fast_prologue_epilogue_nregs = -1;
11853 /* Return a MEM corresponding to a stack slot with mode MODE.
11854 Allocate a new slot if necessary.
11856 The RTL for a function can have several slots available: N is
11857 which slot to use. */
11860 assign_386_stack_local (enum machine_mode mode, int n)
11862 struct stack_local_entry *s;
11864 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11867 for (s = ix86_stack_locals; s; s = s->next)
11868 if (s->mode == mode && s->n == n)
11871 s = (struct stack_local_entry *)
11872 ggc_alloc (sizeof (struct stack_local_entry));
11875 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11877 s->next = ix86_stack_locals;
11878 ix86_stack_locals = s;
11882 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11884 static GTY(()) rtx ix86_tls_symbol;
11886 ix86_tls_get_addr (void)
11889 if (!ix86_tls_symbol)
11891 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11892 (TARGET_GNU_TLS && !TARGET_64BIT)
11893 ? "___tls_get_addr"
11894 : "__tls_get_addr");
11897 return ix86_tls_symbol;
11900 /* Calculate the length of the memory address in the instruction
11901 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11904 memory_address_length (rtx addr)
11906 struct ix86_address parts;
11907 rtx base, index, disp;
11910 if (GET_CODE (addr) == PRE_DEC
11911 || GET_CODE (addr) == POST_INC
11912 || GET_CODE (addr) == PRE_MODIFY
11913 || GET_CODE (addr) == POST_MODIFY)
11916 if (! ix86_decompose_address (addr, &parts))
11920 index = parts.index;
11925 - esp as the base always wants an index,
11926 - ebp as the base always wants a displacement. */
11928 /* Register Indirect. */
11929 if (base && !index && !disp)
11931 /* esp (for its index) and ebp (for its displacement) need
11932 the two-byte modrm form. */
11933 if (addr == stack_pointer_rtx
11934 || addr == arg_pointer_rtx
11935 || addr == frame_pointer_rtx
11936 || addr == hard_frame_pointer_rtx)
11940 /* Direct Addressing. */
11941 else if (disp && !base && !index)
11946 /* Find the length of the displacement constant. */
11949 if (GET_CODE (disp) == CONST_INT
11950 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11956 /* ebp always wants a displacement. */
11957 else if (base == hard_frame_pointer_rtx)
11960 /* An index requires the two-byte modrm form.... */
11962 /* ...like esp, which always wants an index. */
11963 || base == stack_pointer_rtx
11964 || base == arg_pointer_rtx
11965 || base == frame_pointer_rtx)
11972 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11973 is set, expect that insn have 8bit immediate alternative. */
11975 ix86_attr_length_immediate_default (rtx insn, int shortform)
11979 extract_insn_cached (insn);
11980 for (i = recog_data.n_operands - 1; i >= 0; --i)
11981 if (CONSTANT_P (recog_data.operand[i]))
11986 && GET_CODE (recog_data.operand[i]) == CONST_INT
11987 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11991 switch (get_attr_mode (insn))
12002 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12007 fatal_insn ("unknown insn mode", insn);
12013 /* Compute default value for "length_address" attribute. */
12015 ix86_attr_length_address_default (rtx insn)
12019 if (get_attr_type (insn) == TYPE_LEA)
12021 rtx set = PATTERN (insn);
12022 if (GET_CODE (set) == SET)
12024 else if (GET_CODE (set) == PARALLEL
12025 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12026 set = XVECEXP (set, 0, 0);
12029 #ifdef ENABLE_CHECKING
12035 return memory_address_length (SET_SRC (set));
12038 extract_insn_cached (insn);
12039 for (i = recog_data.n_operands - 1; i >= 0; --i)
12040 if (GET_CODE (recog_data.operand[i]) == MEM)
12042 return memory_address_length (XEXP (recog_data.operand[i], 0));
12048 /* Return the maximum number of instructions a cpu can issue. */
12051 ix86_issue_rate (void)
12055 case PROCESSOR_PENTIUM:
12059 case PROCESSOR_PENTIUMPRO:
12060 case PROCESSOR_PENTIUM4:
12061 case PROCESSOR_ATHLON:
12070 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12071 by DEP_INSN and nothing set by DEP_INSN. */
12074 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12078 /* Simplify the test for uninteresting insns. */
12079 if (insn_type != TYPE_SETCC
12080 && insn_type != TYPE_ICMOV
12081 && insn_type != TYPE_FCMOV
12082 && insn_type != TYPE_IBR)
12085 if ((set = single_set (dep_insn)) != 0)
12087 set = SET_DEST (set);
12090 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12091 && XVECLEN (PATTERN (dep_insn), 0) == 2
12092 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12093 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12095 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12096 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12101 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12104 /* This test is true if the dependent insn reads the flags but
12105 not any other potentially set register. */
12106 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12109 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12115 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12116 address with operands set by DEP_INSN. */
12119 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12123 if (insn_type == TYPE_LEA
12126 addr = PATTERN (insn);
12127 if (GET_CODE (addr) == SET)
12129 else if (GET_CODE (addr) == PARALLEL
12130 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12131 addr = XVECEXP (addr, 0, 0);
12134 addr = SET_SRC (addr);
12139 extract_insn_cached (insn);
12140 for (i = recog_data.n_operands - 1; i >= 0; --i)
12141 if (GET_CODE (recog_data.operand[i]) == MEM)
12143 addr = XEXP (recog_data.operand[i], 0);
12150 return modified_in_p (addr, dep_insn);
12154 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12156 enum attr_type insn_type, dep_insn_type;
12157 enum attr_memory memory, dep_memory;
12159 int dep_insn_code_number;
12161 /* Anti and output dependencies have zero cost on all CPUs. */
12162 if (REG_NOTE_KIND (link) != 0)
12165 dep_insn_code_number = recog_memoized (dep_insn);
12167 /* If we can't recognize the insns, we can't really do anything. */
12168 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12171 insn_type = get_attr_type (insn);
12172 dep_insn_type = get_attr_type (dep_insn);
12176 case PROCESSOR_PENTIUM:
12177 /* Address Generation Interlock adds a cycle of latency. */
12178 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12181 /* ??? Compares pair with jump/setcc. */
12182 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12185 /* Floating point stores require value to be ready one cycle earlier. */
12186 if (insn_type == TYPE_FMOV
12187 && get_attr_memory (insn) == MEMORY_STORE
12188 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12192 case PROCESSOR_PENTIUMPRO:
12193 memory = get_attr_memory (insn);
12194 dep_memory = get_attr_memory (dep_insn);
12196 /* Since we can't represent delayed latencies of load+operation,
12197 increase the cost here for non-imov insns. */
12198 if (dep_insn_type != TYPE_IMOV
12199 && dep_insn_type != TYPE_FMOV
12200 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12203 /* INT->FP conversion is expensive. */
12204 if (get_attr_fp_int_src (dep_insn))
12207 /* There is one cycle extra latency between an FP op and a store. */
12208 if (insn_type == TYPE_FMOV
12209 && (set = single_set (dep_insn)) != NULL_RTX
12210 && (set2 = single_set (insn)) != NULL_RTX
12211 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12212 && GET_CODE (SET_DEST (set2)) == MEM)
12215 /* Show ability of reorder buffer to hide latency of load by executing
12216 in parallel with previous instruction in case
12217 previous instruction is not needed to compute the address. */
12218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12219 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12221 /* Claim moves to take one cycle, as core can issue one load
12222 at time and the next load can start cycle later. */
12223 if (dep_insn_type == TYPE_IMOV
12224 || dep_insn_type == TYPE_FMOV)
12232 memory = get_attr_memory (insn);
12233 dep_memory = get_attr_memory (dep_insn);
12234 /* The esp dependency is resolved before the instruction is really
12236 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12237 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12240 /* Since we can't represent delayed latencies of load+operation,
12241 increase the cost here for non-imov insns. */
12242 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12243 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12245 /* INT->FP conversion is expensive. */
12246 if (get_attr_fp_int_src (dep_insn))
12249 /* Show ability of reorder buffer to hide latency of load by executing
12250 in parallel with previous instruction in case
12251 previous instruction is not needed to compute the address. */
12252 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12253 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12255 /* Claim moves to take one cycle, as core can issue one load
12256 at time and the next load can start cycle later. */
12257 if (dep_insn_type == TYPE_IMOV
12258 || dep_insn_type == TYPE_FMOV)
12267 case PROCESSOR_ATHLON:
12269 memory = get_attr_memory (insn);
12270 dep_memory = get_attr_memory (dep_insn);
12272 /* Show ability of reorder buffer to hide latency of load by executing
12273 in parallel with previous instruction in case
12274 previous instruction is not needed to compute the address. */
12275 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12276 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12278 enum attr_unit unit = get_attr_unit (insn);
12281 /* Because of the difference between the length of integer and
12282 floating unit pipeline preparation stages, the memory operands
12283 for floating point are cheaper.
12285 ??? For Athlon it the difference is most probably 2. */
12286 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12289 loadcost = TARGET_ATHLON ? 2 : 0;
12291 if (cost >= loadcost)
12306 struct ppro_sched_data
12309 int issued_this_cycle;
12313 static enum attr_ppro_uops
12314 ix86_safe_ppro_uops (rtx insn)
12316 if (recog_memoized (insn) >= 0)
12317 return get_attr_ppro_uops (insn);
12319 return PPRO_UOPS_MANY;
12323 ix86_dump_ppro_packet (FILE *dump)
12325 if (ix86_sched_data.ppro.decode[0])
12327 fprintf (dump, "PPRO packet: %d",
12328 INSN_UID (ix86_sched_data.ppro.decode[0]));
12329 if (ix86_sched_data.ppro.decode[1])
12330 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12331 if (ix86_sched_data.ppro.decode[2])
12332 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12333 fputc ('\n', dump);
12337 /* We're beginning a new block. Initialize data structures as necessary. */
12340 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12341 int sched_verbose ATTRIBUTE_UNUSED,
12342 int veclen ATTRIBUTE_UNUSED)
12344 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12347 /* Shift INSN to SLOT, and shift everything else down. */
12350 ix86_reorder_insn (rtx *insnp, rtx *slot)
12356 insnp[0] = insnp[1];
12357 while (++insnp != slot);
12363 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12366 enum attr_ppro_uops cur_uops;
12367 int issued_this_cycle;
12371 /* At this point .ppro.decode contains the state of the three
12372 decoders from last "cycle". That is, those insns that were
12373 actually independent. But here we're scheduling for the
12374 decoder, and we may find things that are decodable in the
12377 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12378 issued_this_cycle = 0;
12381 cur_uops = ix86_safe_ppro_uops (*insnp);
12383 /* If the decoders are empty, and we've a complex insn at the
12384 head of the priority queue, let it issue without complaint. */
12385 if (decode[0] == NULL)
12387 if (cur_uops == PPRO_UOPS_MANY)
12389 decode[0] = *insnp;
12393 /* Otherwise, search for a 2-4 uop unsn to issue. */
12394 while (cur_uops != PPRO_UOPS_FEW)
12396 if (insnp == ready)
12398 cur_uops = ix86_safe_ppro_uops (*--insnp);
12401 /* If so, move it to the head of the line. */
12402 if (cur_uops == PPRO_UOPS_FEW)
12403 ix86_reorder_insn (insnp, e_ready);
12405 /* Issue the head of the queue. */
12406 issued_this_cycle = 1;
12407 decode[0] = *e_ready--;
12410 /* Look for simple insns to fill in the other two slots. */
12411 for (i = 1; i < 3; ++i)
12412 if (decode[i] == NULL)
12414 if (ready > e_ready)
12418 cur_uops = ix86_safe_ppro_uops (*insnp);
12419 while (cur_uops != PPRO_UOPS_ONE)
12421 if (insnp == ready)
12423 cur_uops = ix86_safe_ppro_uops (*--insnp);
12426 /* Found one. Move it to the head of the queue and issue it. */
12427 if (cur_uops == PPRO_UOPS_ONE)
12429 ix86_reorder_insn (insnp, e_ready);
12430 decode[i] = *e_ready--;
12431 issued_this_cycle++;
12435 /* ??? Didn't find one. Ideally, here we would do a lazy split
12436 of 2-uop insns, issue one and queue the other. */
12440 if (issued_this_cycle == 0)
12441 issued_this_cycle = 1;
12442 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12445 /* We are about to being issuing insns for this clock cycle.
12446 Override the default sort algorithm to better slot instructions. */
12448 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12449 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12450 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12452 int n_ready = *n_readyp;
12453 rtx *e_ready = ready + n_ready - 1;
12455 /* Make sure to go ahead and initialize key items in
12456 ix86_sched_data if we are not going to bother trying to
12457 reorder the ready queue. */
12460 ix86_sched_data.ppro.issued_this_cycle = 1;
12469 case PROCESSOR_PENTIUMPRO:
12470 ix86_sched_reorder_ppro (ready, e_ready);
12475 return ix86_issue_rate ();
12478 /* We are about to issue INSN. Return the number of insns left on the
12479 ready queue that can be issued this cycle. */
12482 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12483 int can_issue_more)
12489 return can_issue_more - 1;
12491 case PROCESSOR_PENTIUMPRO:
12493 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12495 if (uops == PPRO_UOPS_MANY)
12498 ix86_dump_ppro_packet (dump);
12499 ix86_sched_data.ppro.decode[0] = insn;
12500 ix86_sched_data.ppro.decode[1] = NULL;
12501 ix86_sched_data.ppro.decode[2] = NULL;
12503 ix86_dump_ppro_packet (dump);
12504 ix86_sched_data.ppro.decode[0] = NULL;
12506 else if (uops == PPRO_UOPS_FEW)
12509 ix86_dump_ppro_packet (dump);
12510 ix86_sched_data.ppro.decode[0] = insn;
12511 ix86_sched_data.ppro.decode[1] = NULL;
12512 ix86_sched_data.ppro.decode[2] = NULL;
12516 for (i = 0; i < 3; ++i)
12517 if (ix86_sched_data.ppro.decode[i] == NULL)
12519 ix86_sched_data.ppro.decode[i] = insn;
12527 ix86_dump_ppro_packet (dump);
12528 ix86_sched_data.ppro.decode[0] = NULL;
12529 ix86_sched_data.ppro.decode[1] = NULL;
12530 ix86_sched_data.ppro.decode[2] = NULL;
12534 return --ix86_sched_data.ppro.issued_this_cycle;
12539 ia32_use_dfa_pipeline_interface (void)
12541 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12546 /* How many alternative schedules to try. This should be as wide as the
12547 scheduling freedom in the DFA, but no wider. Making this value too
12548 large results extra work for the scheduler. */
12551 ia32_multipass_dfa_lookahead (void)
12553 if (ix86_tune == PROCESSOR_PENTIUM)
12560 /* Compute the alignment given to a constant that is being placed in memory.
12561 EXP is the constant and ALIGN is the alignment that the object would
12563 The value of this function is used instead of that alignment to align
12567 ix86_constant_alignment (tree exp, int align)
12569 if (TREE_CODE (exp) == REAL_CST)
12571 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12573 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12576 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12577 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12578 return BITS_PER_WORD;
12583 /* Compute the alignment for a static variable.
12584 TYPE is the data type, and ALIGN is the alignment that
12585 the object would ordinarily have. The value of this function is used
12586 instead of that alignment to align the object. */
12589 ix86_data_alignment (tree type, int align)
12591 if (AGGREGATE_TYPE_P (type)
12592 && TYPE_SIZE (type)
12593 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12594 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12595 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12598 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12599 to 16byte boundary. */
12602 if (AGGREGATE_TYPE_P (type)
12603 && TYPE_SIZE (type)
12604 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12605 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12606 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12610 if (TREE_CODE (type) == ARRAY_TYPE)
12612 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12614 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12617 else if (TREE_CODE (type) == COMPLEX_TYPE)
12620 if (TYPE_MODE (type) == DCmode && align < 64)
12622 if (TYPE_MODE (type) == XCmode && align < 128)
12625 else if ((TREE_CODE (type) == RECORD_TYPE
12626 || TREE_CODE (type) == UNION_TYPE
12627 || TREE_CODE (type) == QUAL_UNION_TYPE)
12628 && TYPE_FIELDS (type))
12630 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12632 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12635 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12636 || TREE_CODE (type) == INTEGER_TYPE)
12638 if (TYPE_MODE (type) == DFmode && align < 64)
12640 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12647 /* Compute the alignment for a local variable.
12648 TYPE is the data type, and ALIGN is the alignment that
12649 the object would ordinarily have. The value of this macro is used
12650 instead of that alignment to align the object. */
12653 ix86_local_alignment (tree type, int align)
12655 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12656 to 16byte boundary. */
12659 if (AGGREGATE_TYPE_P (type)
12660 && TYPE_SIZE (type)
12661 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12662 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12663 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12666 if (TREE_CODE (type) == ARRAY_TYPE)
12668 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12670 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12673 else if (TREE_CODE (type) == COMPLEX_TYPE)
12675 if (TYPE_MODE (type) == DCmode && align < 64)
12677 if (TYPE_MODE (type) == XCmode && align < 128)
12680 else if ((TREE_CODE (type) == RECORD_TYPE
12681 || TREE_CODE (type) == UNION_TYPE
12682 || TREE_CODE (type) == QUAL_UNION_TYPE)
12683 && TYPE_FIELDS (type))
12685 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12687 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12690 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12691 || TREE_CODE (type) == INTEGER_TYPE)
12694 if (TYPE_MODE (type) == DFmode && align < 64)
12696 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12702 /* Emit RTL insns to initialize the variable parts of a trampoline.
12703 FNADDR is an RTX for the address of the function's pure code.
12704 CXT is an RTX for the static chain value for the function. */
12706 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12710 /* Compute offset from the end of the jmp to the target function. */
12711 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12712 plus_constant (tramp, 10),
12713 NULL_RTX, 1, OPTAB_DIRECT);
12714 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12715 gen_int_mode (0xb9, QImode));
12716 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12717 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12718 gen_int_mode (0xe9, QImode));
12719 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12724 /* Try to load address using shorter movl instead of movabs.
12725 We may want to support movq for kernel mode, but kernel does not use
12726 trampolines at the moment. */
12727 if (x86_64_zero_extended_value (fnaddr))
12729 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12730 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12731 gen_int_mode (0xbb41, HImode));
12732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12733 gen_lowpart (SImode, fnaddr));
12738 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12739 gen_int_mode (0xbb49, HImode));
12740 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12744 /* Load static chain using movabs to r10. */
12745 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12746 gen_int_mode (0xba49, HImode));
12747 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12750 /* Jump to the r11 */
12751 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12752 gen_int_mode (0xff49, HImode));
12753 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12754 gen_int_mode (0xe3, QImode));
12756 if (offset > TRAMPOLINE_SIZE)
12760 #ifdef TRANSFER_FROM_TRAMPOLINE
12761 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12762 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12766 #define def_builtin(MASK, NAME, TYPE, CODE) \
12768 if ((MASK) & target_flags \
12769 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12770 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12771 NULL, NULL_TREE); \
12774 struct builtin_description
12776 const unsigned int mask;
12777 const enum insn_code icode;
12778 const char *const name;
12779 const enum ix86_builtins code;
12780 const enum rtx_code comparison;
12781 const unsigned int flag;
12784 static const struct builtin_description bdesc_comi[] =
12786 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12787 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12793 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12812 static const struct builtin_description bdesc_2arg[] =
12815 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12816 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12817 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12825 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12828 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12829 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12830 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12831 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12834 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12835 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12836 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12837 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12842 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12845 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12847 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12851 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12852 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12862 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12863 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12864 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12865 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12866 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12867 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12869 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12871 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12872 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12877 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12878 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12880 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12882 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12886 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12889 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12893 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12894 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12895 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12896 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12897 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12899 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12913 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12916 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12917 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12918 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12920 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12921 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12925 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12927 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12928 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12930 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12935 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12936 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12939 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12940 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12953 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12956 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12957 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12958 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12959 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12962 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12963 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12970 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12973 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13003 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13069 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13074 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13075 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13076 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13077 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13078 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13079 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13082 static const struct builtin_description bdesc_1arg[] =
13084 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13085 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13087 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13088 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13091 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13092 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13093 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13094 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13095 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13096 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13118 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13119 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13128 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13129 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13130 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13134 ix86_init_builtins (void)
13137 ix86_init_mmx_sse_builtins ();
13140 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13141 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13144 ix86_init_mmx_sse_builtins (void)
13146 const struct builtin_description * d;
13149 tree pchar_type_node = build_pointer_type (char_type_node);
13150 tree pcchar_type_node = build_pointer_type (
13151 build_type_variant (char_type_node, 1, 0));
13152 tree pfloat_type_node = build_pointer_type (float_type_node);
13153 tree pcfloat_type_node = build_pointer_type (
13154 build_type_variant (float_type_node, 1, 0));
13155 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13156 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13157 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13160 tree int_ftype_v4sf_v4sf
13161 = build_function_type_list (integer_type_node,
13162 V4SF_type_node, V4SF_type_node, NULL_TREE);
13163 tree v4si_ftype_v4sf_v4sf
13164 = build_function_type_list (V4SI_type_node,
13165 V4SF_type_node, V4SF_type_node, NULL_TREE);
13166 /* MMX/SSE/integer conversions. */
13167 tree int_ftype_v4sf
13168 = build_function_type_list (integer_type_node,
13169 V4SF_type_node, NULL_TREE);
13170 tree int64_ftype_v4sf
13171 = build_function_type_list (long_long_integer_type_node,
13172 V4SF_type_node, NULL_TREE);
13173 tree int_ftype_v8qi
13174 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13175 tree v4sf_ftype_v4sf_int
13176 = build_function_type_list (V4SF_type_node,
13177 V4SF_type_node, integer_type_node, NULL_TREE);
13178 tree v4sf_ftype_v4sf_int64
13179 = build_function_type_list (V4SF_type_node,
13180 V4SF_type_node, long_long_integer_type_node,
13182 tree v4sf_ftype_v4sf_v2si
13183 = build_function_type_list (V4SF_type_node,
13184 V4SF_type_node, V2SI_type_node, NULL_TREE);
13185 tree int_ftype_v4hi_int
13186 = build_function_type_list (integer_type_node,
13187 V4HI_type_node, integer_type_node, NULL_TREE);
13188 tree v4hi_ftype_v4hi_int_int
13189 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13190 integer_type_node, integer_type_node,
13192 /* Miscellaneous. */
13193 tree v8qi_ftype_v4hi_v4hi
13194 = build_function_type_list (V8QI_type_node,
13195 V4HI_type_node, V4HI_type_node, NULL_TREE);
13196 tree v4hi_ftype_v2si_v2si
13197 = build_function_type_list (V4HI_type_node,
13198 V2SI_type_node, V2SI_type_node, NULL_TREE);
13199 tree v4sf_ftype_v4sf_v4sf_int
13200 = build_function_type_list (V4SF_type_node,
13201 V4SF_type_node, V4SF_type_node,
13202 integer_type_node, NULL_TREE);
13203 tree v2si_ftype_v4hi_v4hi
13204 = build_function_type_list (V2SI_type_node,
13205 V4HI_type_node, V4HI_type_node, NULL_TREE);
13206 tree v4hi_ftype_v4hi_int
13207 = build_function_type_list (V4HI_type_node,
13208 V4HI_type_node, integer_type_node, NULL_TREE);
13209 tree v4hi_ftype_v4hi_di
13210 = build_function_type_list (V4HI_type_node,
13211 V4HI_type_node, long_long_unsigned_type_node,
13213 tree v2si_ftype_v2si_di
13214 = build_function_type_list (V2SI_type_node,
13215 V2SI_type_node, long_long_unsigned_type_node,
13217 tree void_ftype_void
13218 = build_function_type (void_type_node, void_list_node);
13219 tree void_ftype_unsigned
13220 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13221 tree void_ftype_unsigned_unsigned
13222 = build_function_type_list (void_type_node, unsigned_type_node,
13223 unsigned_type_node, NULL_TREE);
13224 tree void_ftype_pcvoid_unsigned_unsigned
13225 = build_function_type_list (void_type_node, const_ptr_type_node,
13226 unsigned_type_node, unsigned_type_node,
13228 tree unsigned_ftype_void
13229 = build_function_type (unsigned_type_node, void_list_node);
13231 = build_function_type (long_long_unsigned_type_node, void_list_node);
13232 tree v4sf_ftype_void
13233 = build_function_type (V4SF_type_node, void_list_node);
13234 tree v2si_ftype_v4sf
13235 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13236 /* Loads/stores. */
13237 tree void_ftype_v8qi_v8qi_pchar
13238 = build_function_type_list (void_type_node,
13239 V8QI_type_node, V8QI_type_node,
13240 pchar_type_node, NULL_TREE);
13241 tree v4sf_ftype_pcfloat
13242 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13243 /* @@@ the type is bogus */
13244 tree v4sf_ftype_v4sf_pv2si
13245 = build_function_type_list (V4SF_type_node,
13246 V4SF_type_node, pv2si_type_node, NULL_TREE);
13247 tree void_ftype_pv2si_v4sf
13248 = build_function_type_list (void_type_node,
13249 pv2si_type_node, V4SF_type_node, NULL_TREE);
13250 tree void_ftype_pfloat_v4sf
13251 = build_function_type_list (void_type_node,
13252 pfloat_type_node, V4SF_type_node, NULL_TREE);
13253 tree void_ftype_pdi_di
13254 = build_function_type_list (void_type_node,
13255 pdi_type_node, long_long_unsigned_type_node,
13257 tree void_ftype_pv2di_v2di
13258 = build_function_type_list (void_type_node,
13259 pv2di_type_node, V2DI_type_node, NULL_TREE);
13260 /* Normal vector unops. */
13261 tree v4sf_ftype_v4sf
13262 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13264 /* Normal vector binops. */
13265 tree v4sf_ftype_v4sf_v4sf
13266 = build_function_type_list (V4SF_type_node,
13267 V4SF_type_node, V4SF_type_node, NULL_TREE);
13268 tree v8qi_ftype_v8qi_v8qi
13269 = build_function_type_list (V8QI_type_node,
13270 V8QI_type_node, V8QI_type_node, NULL_TREE);
13271 tree v4hi_ftype_v4hi_v4hi
13272 = build_function_type_list (V4HI_type_node,
13273 V4HI_type_node, V4HI_type_node, NULL_TREE);
13274 tree v2si_ftype_v2si_v2si
13275 = build_function_type_list (V2SI_type_node,
13276 V2SI_type_node, V2SI_type_node, NULL_TREE);
13277 tree di_ftype_di_di
13278 = build_function_type_list (long_long_unsigned_type_node,
13279 long_long_unsigned_type_node,
13280 long_long_unsigned_type_node, NULL_TREE);
13282 tree v2si_ftype_v2sf
13283 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13284 tree v2sf_ftype_v2si
13285 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13286 tree v2si_ftype_v2si
13287 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13288 tree v2sf_ftype_v2sf
13289 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13290 tree v2sf_ftype_v2sf_v2sf
13291 = build_function_type_list (V2SF_type_node,
13292 V2SF_type_node, V2SF_type_node, NULL_TREE);
13293 tree v2si_ftype_v2sf_v2sf
13294 = build_function_type_list (V2SI_type_node,
13295 V2SF_type_node, V2SF_type_node, NULL_TREE);
13296 tree pint_type_node = build_pointer_type (integer_type_node);
13297 tree pcint_type_node = build_pointer_type (
13298 build_type_variant (integer_type_node, 1, 0));
13299 tree pdouble_type_node = build_pointer_type (double_type_node);
13300 tree pcdouble_type_node = build_pointer_type (
13301 build_type_variant (double_type_node, 1, 0));
13302 tree int_ftype_v2df_v2df
13303 = build_function_type_list (integer_type_node,
13304 V2DF_type_node, V2DF_type_node, NULL_TREE);
13307 = build_function_type (intTI_type_node, void_list_node);
13308 tree v2di_ftype_void
13309 = build_function_type (V2DI_type_node, void_list_node);
13310 tree ti_ftype_ti_ti
13311 = build_function_type_list (intTI_type_node,
13312 intTI_type_node, intTI_type_node, NULL_TREE);
13313 tree void_ftype_pcvoid
13314 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13316 = build_function_type_list (V2DI_type_node,
13317 long_long_unsigned_type_node, NULL_TREE);
13319 = build_function_type_list (long_long_unsigned_type_node,
13320 V2DI_type_node, NULL_TREE);
13321 tree v4sf_ftype_v4si
13322 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13323 tree v4si_ftype_v4sf
13324 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13325 tree v2df_ftype_v4si
13326 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13327 tree v4si_ftype_v2df
13328 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13329 tree v2si_ftype_v2df
13330 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13331 tree v4sf_ftype_v2df
13332 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13333 tree v2df_ftype_v2si
13334 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13335 tree v2df_ftype_v4sf
13336 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13337 tree int_ftype_v2df
13338 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13339 tree int64_ftype_v2df
13340 = build_function_type_list (long_long_integer_type_node,
13341 V2DF_type_node, NULL_TREE);
13342 tree v2df_ftype_v2df_int
13343 = build_function_type_list (V2DF_type_node,
13344 V2DF_type_node, integer_type_node, NULL_TREE);
13345 tree v2df_ftype_v2df_int64
13346 = build_function_type_list (V2DF_type_node,
13347 V2DF_type_node, long_long_integer_type_node,
13349 tree v4sf_ftype_v4sf_v2df
13350 = build_function_type_list (V4SF_type_node,
13351 V4SF_type_node, V2DF_type_node, NULL_TREE);
13352 tree v2df_ftype_v2df_v4sf
13353 = build_function_type_list (V2DF_type_node,
13354 V2DF_type_node, V4SF_type_node, NULL_TREE);
13355 tree v2df_ftype_v2df_v2df_int
13356 = build_function_type_list (V2DF_type_node,
13357 V2DF_type_node, V2DF_type_node,
13360 tree v2df_ftype_v2df_pv2si
13361 = build_function_type_list (V2DF_type_node,
13362 V2DF_type_node, pv2si_type_node, NULL_TREE);
13363 tree void_ftype_pv2si_v2df
13364 = build_function_type_list (void_type_node,
13365 pv2si_type_node, V2DF_type_node, NULL_TREE);
13366 tree void_ftype_pdouble_v2df
13367 = build_function_type_list (void_type_node,
13368 pdouble_type_node, V2DF_type_node, NULL_TREE);
13369 tree void_ftype_pint_int
13370 = build_function_type_list (void_type_node,
13371 pint_type_node, integer_type_node, NULL_TREE);
13372 tree void_ftype_v16qi_v16qi_pchar
13373 = build_function_type_list (void_type_node,
13374 V16QI_type_node, V16QI_type_node,
13375 pchar_type_node, NULL_TREE);
13376 tree v2df_ftype_pcdouble
13377 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13378 tree v2df_ftype_v2df_v2df
13379 = build_function_type_list (V2DF_type_node,
13380 V2DF_type_node, V2DF_type_node, NULL_TREE);
13381 tree v16qi_ftype_v16qi_v16qi
13382 = build_function_type_list (V16QI_type_node,
13383 V16QI_type_node, V16QI_type_node, NULL_TREE);
13384 tree v8hi_ftype_v8hi_v8hi
13385 = build_function_type_list (V8HI_type_node,
13386 V8HI_type_node, V8HI_type_node, NULL_TREE);
13387 tree v4si_ftype_v4si_v4si
13388 = build_function_type_list (V4SI_type_node,
13389 V4SI_type_node, V4SI_type_node, NULL_TREE);
13390 tree v2di_ftype_v2di_v2di
13391 = build_function_type_list (V2DI_type_node,
13392 V2DI_type_node, V2DI_type_node, NULL_TREE);
13393 tree v2di_ftype_v2df_v2df
13394 = build_function_type_list (V2DI_type_node,
13395 V2DF_type_node, V2DF_type_node, NULL_TREE);
13396 tree v2df_ftype_v2df
13397 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13398 tree v2df_ftype_double
13399 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13400 tree v2df_ftype_double_double
13401 = build_function_type_list (V2DF_type_node,
13402 double_type_node, double_type_node, NULL_TREE);
13403 tree int_ftype_v8hi_int
13404 = build_function_type_list (integer_type_node,
13405 V8HI_type_node, integer_type_node, NULL_TREE);
13406 tree v8hi_ftype_v8hi_int_int
13407 = build_function_type_list (V8HI_type_node,
13408 V8HI_type_node, integer_type_node,
13409 integer_type_node, NULL_TREE);
13410 tree v2di_ftype_v2di_int
13411 = build_function_type_list (V2DI_type_node,
13412 V2DI_type_node, integer_type_node, NULL_TREE);
13413 tree v4si_ftype_v4si_int
13414 = build_function_type_list (V4SI_type_node,
13415 V4SI_type_node, integer_type_node, NULL_TREE);
13416 tree v8hi_ftype_v8hi_int
13417 = build_function_type_list (V8HI_type_node,
13418 V8HI_type_node, integer_type_node, NULL_TREE);
13419 tree v8hi_ftype_v8hi_v2di
13420 = build_function_type_list (V8HI_type_node,
13421 V8HI_type_node, V2DI_type_node, NULL_TREE);
13422 tree v4si_ftype_v4si_v2di
13423 = build_function_type_list (V4SI_type_node,
13424 V4SI_type_node, V2DI_type_node, NULL_TREE);
13425 tree v4si_ftype_v8hi_v8hi
13426 = build_function_type_list (V4SI_type_node,
13427 V8HI_type_node, V8HI_type_node, NULL_TREE);
13428 tree di_ftype_v8qi_v8qi
13429 = build_function_type_list (long_long_unsigned_type_node,
13430 V8QI_type_node, V8QI_type_node, NULL_TREE);
13431 tree v2di_ftype_v16qi_v16qi
13432 = build_function_type_list (V2DI_type_node,
13433 V16QI_type_node, V16QI_type_node, NULL_TREE);
13434 tree int_ftype_v16qi
13435 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13436 tree v16qi_ftype_pcchar
13437 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13438 tree void_ftype_pchar_v16qi
13439 = build_function_type_list (void_type_node,
13440 pchar_type_node, V16QI_type_node, NULL_TREE);
13441 tree v4si_ftype_pcint
13442 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13443 tree void_ftype_pcint_v4si
13444 = build_function_type_list (void_type_node,
13445 pcint_type_node, V4SI_type_node, NULL_TREE);
13446 tree v2di_ftype_v2di
13447 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13450 tree float128_type;
13452 /* The __float80 type. */
13453 if (TYPE_MODE (long_double_type_node) == XFmode)
13454 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13458 /* The __float80 type. */
13459 float80_type = make_node (REAL_TYPE);
13460 TYPE_PRECISION (float80_type) = 96;
13461 layout_type (float80_type);
13462 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13465 float128_type = make_node (REAL_TYPE);
13466 TYPE_PRECISION (float128_type) = 128;
13467 layout_type (float128_type);
13468 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13470 /* Add all builtins that are more or less simple operations on two
13472 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13474 /* Use one of the operands; the target can have a different mode for
13475 mask-generating compares. */
13476 enum machine_mode mode;
13481 mode = insn_data[d->icode].operand[1].mode;
13486 type = v16qi_ftype_v16qi_v16qi;
13489 type = v8hi_ftype_v8hi_v8hi;
13492 type = v4si_ftype_v4si_v4si;
13495 type = v2di_ftype_v2di_v2di;
13498 type = v2df_ftype_v2df_v2df;
13501 type = ti_ftype_ti_ti;
13504 type = v4sf_ftype_v4sf_v4sf;
13507 type = v8qi_ftype_v8qi_v8qi;
13510 type = v4hi_ftype_v4hi_v4hi;
13513 type = v2si_ftype_v2si_v2si;
13516 type = di_ftype_di_di;
13523 /* Override for comparisons. */
13524 if (d->icode == CODE_FOR_maskcmpv4sf3
13525 || d->icode == CODE_FOR_maskncmpv4sf3
13526 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13527 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13528 type = v4si_ftype_v4sf_v4sf;
13530 if (d->icode == CODE_FOR_maskcmpv2df3
13531 || d->icode == CODE_FOR_maskncmpv2df3
13532 || d->icode == CODE_FOR_vmmaskcmpv2df3
13533 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13534 type = v2di_ftype_v2df_v2df;
13536 def_builtin (d->mask, d->name, type, d->code);
13539 /* Add the remaining MMX insns with somewhat more complicated types. */
13540 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13541 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13542 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13543 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13544 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13546 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13547 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13548 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13550 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13551 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13553 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13554 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13556 /* comi/ucomi insns. */
13557 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13558 if (d->mask == MASK_SSE2)
13559 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13561 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13563 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13564 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13565 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13567 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13568 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13569 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13570 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13571 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13572 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13573 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13574 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13575 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13576 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13577 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13579 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13582 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13584 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13585 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13588 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13591 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13592 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13593 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13596 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13597 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13598 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13599 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13601 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13603 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13605 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13606 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13607 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13608 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13609 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13614 /* Original 3DNow! */
13615 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13616 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13617 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13618 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13619 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13636 /* 3DNow! extension as used in the Athlon CPU. */
13637 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13638 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13640 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13641 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13642 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13644 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13695 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13696 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13703 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13727 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13734 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13755 /* Prescott New Instructions. */
13756 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13757 void_ftype_pcvoid_unsigned_unsigned,
13758 IX86_BUILTIN_MONITOR);
13759 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13760 void_ftype_unsigned_unsigned,
13761 IX86_BUILTIN_MWAIT);
13762 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13764 IX86_BUILTIN_MOVSHDUP);
13765 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13767 IX86_BUILTIN_MOVSLDUP);
13768 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13769 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13770 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13771 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13772 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13773 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13776 /* Errors in the source file can cause expand_expr to return const0_rtx
13777 where we expect a vector. To avoid crashing, use one of the vector
13778 clear instructions. */
13780 safe_vector_operand (rtx x, enum machine_mode mode)
13782 if (x != const0_rtx)
13784 x = gen_reg_rtx (mode);
13786 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13787 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13788 : gen_rtx_SUBREG (DImode, x, 0)));
13790 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13791 : gen_rtx_SUBREG (V4SFmode, x, 0),
13792 CONST0_RTX (V4SFmode)));
13796 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13799 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13802 tree arg0 = TREE_VALUE (arglist);
13803 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13804 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13805 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13806 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13807 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13808 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13810 if (VECTOR_MODE_P (mode0))
13811 op0 = safe_vector_operand (op0, mode0);
13812 if (VECTOR_MODE_P (mode1))
13813 op1 = safe_vector_operand (op1, mode1);
13816 || GET_MODE (target) != tmode
13817 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13818 target = gen_reg_rtx (tmode);
13820 if (GET_MODE (op1) == SImode && mode1 == TImode)
13822 rtx x = gen_reg_rtx (V4SImode);
13823 emit_insn (gen_sse2_loadd (x, op1));
13824 op1 = gen_lowpart (TImode, x);
13827 /* In case the insn wants input operands in modes different from
13828 the result, abort. */
13829 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13830 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13833 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13834 op0 = copy_to_mode_reg (mode0, op0);
13835 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13836 op1 = copy_to_mode_reg (mode1, op1);
13838 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13839 yet one of the two must not be a memory. This is normally enforced
13840 by expanders, but we didn't bother to create one here. */
13841 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13842 op0 = copy_to_mode_reg (mode0, op0);
13844 pat = GEN_FCN (icode) (target, op0, op1);
13851 /* Subroutine of ix86_expand_builtin to take care of stores. */
13854 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13857 tree arg0 = TREE_VALUE (arglist);
13858 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13859 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13860 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13861 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13862 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13864 if (VECTOR_MODE_P (mode1))
13865 op1 = safe_vector_operand (op1, mode1);
13867 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13868 op1 = copy_to_mode_reg (mode1, op1);
13870 pat = GEN_FCN (icode) (op0, op1);
13876 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13879 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13880 rtx target, int do_load)
13883 tree arg0 = TREE_VALUE (arglist);
13884 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13885 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13886 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13889 || GET_MODE (target) != tmode
13890 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13891 target = gen_reg_rtx (tmode);
13893 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13896 if (VECTOR_MODE_P (mode0))
13897 op0 = safe_vector_operand (op0, mode0);
13899 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13900 op0 = copy_to_mode_reg (mode0, op0);
13903 pat = GEN_FCN (icode) (target, op0);
13910 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13911 sqrtss, rsqrtss, rcpss. */
13914 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13917 tree arg0 = TREE_VALUE (arglist);
13918 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13919 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13920 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13923 || GET_MODE (target) != tmode
13924 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13925 target = gen_reg_rtx (tmode);
13927 if (VECTOR_MODE_P (mode0))
13928 op0 = safe_vector_operand (op0, mode0);
13930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13931 op0 = copy_to_mode_reg (mode0, op0);
13934 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13935 op1 = copy_to_mode_reg (mode0, op1);
13937 pat = GEN_FCN (icode) (target, op0, op1);
13944 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13947 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13951 tree arg0 = TREE_VALUE (arglist);
13952 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13953 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13954 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13956 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13957 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13958 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13959 enum rtx_code comparison = d->comparison;
13961 if (VECTOR_MODE_P (mode0))
13962 op0 = safe_vector_operand (op0, mode0);
13963 if (VECTOR_MODE_P (mode1))
13964 op1 = safe_vector_operand (op1, mode1);
13966 /* Swap operands if we have a comparison that isn't available in
13970 rtx tmp = gen_reg_rtx (mode1);
13971 emit_move_insn (tmp, op1);
13977 || GET_MODE (target) != tmode
13978 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13979 target = gen_reg_rtx (tmode);
13981 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13982 op0 = copy_to_mode_reg (mode0, op0);
13983 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13984 op1 = copy_to_mode_reg (mode1, op1);
13986 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13987 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13994 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13997 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14001 tree arg0 = TREE_VALUE (arglist);
14002 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14003 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14004 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14006 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14007 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14008 enum rtx_code comparison = d->comparison;
14010 if (VECTOR_MODE_P (mode0))
14011 op0 = safe_vector_operand (op0, mode0);
14012 if (VECTOR_MODE_P (mode1))
14013 op1 = safe_vector_operand (op1, mode1);
14015 /* Swap operands if we have a comparison that isn't available in
14024 target = gen_reg_rtx (SImode);
14025 emit_move_insn (target, const0_rtx);
14026 target = gen_rtx_SUBREG (QImode, target, 0);
14028 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14029 op0 = copy_to_mode_reg (mode0, op0);
14030 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14031 op1 = copy_to_mode_reg (mode1, op1);
14033 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14034 pat = GEN_FCN (d->icode) (op0, op1);
14038 emit_insn (gen_rtx_SET (VOIDmode,
14039 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14040 gen_rtx_fmt_ee (comparison, QImode,
14044 return SUBREG_REG (target);
14047 /* Expand an expression EXP that calls a built-in function,
14048 with result going to TARGET if that's convenient
14049 (and in mode MODE if that's convenient).
14050 SUBTARGET may be used as the target for computing one of EXP's operands.
14051 IGNORE is nonzero if the value is to be ignored. */
14054 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14055 enum machine_mode mode ATTRIBUTE_UNUSED,
14056 int ignore ATTRIBUTE_UNUSED)
14058 const struct builtin_description *d;
14060 enum insn_code icode;
14061 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14062 tree arglist = TREE_OPERAND (exp, 1);
14063 tree arg0, arg1, arg2;
14064 rtx op0, op1, op2, pat;
14065 enum machine_mode tmode, mode0, mode1, mode2;
14066 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14070 case IX86_BUILTIN_EMMS:
14071 emit_insn (gen_emms ());
14074 case IX86_BUILTIN_SFENCE:
14075 emit_insn (gen_sfence ());
14078 case IX86_BUILTIN_PEXTRW:
14079 case IX86_BUILTIN_PEXTRW128:
14080 icode = (fcode == IX86_BUILTIN_PEXTRW
14081 ? CODE_FOR_mmx_pextrw
14082 : CODE_FOR_sse2_pextrw);
14083 arg0 = TREE_VALUE (arglist);
14084 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14085 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14086 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14087 tmode = insn_data[icode].operand[0].mode;
14088 mode0 = insn_data[icode].operand[1].mode;
14089 mode1 = insn_data[icode].operand[2].mode;
14091 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14092 op0 = copy_to_mode_reg (mode0, op0);
14093 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14095 error ("selector must be an integer constant in the range 0..%i",
14096 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14097 return gen_reg_rtx (tmode);
14100 || GET_MODE (target) != tmode
14101 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14102 target = gen_reg_rtx (tmode);
14103 pat = GEN_FCN (icode) (target, op0, op1);
14109 case IX86_BUILTIN_PINSRW:
14110 case IX86_BUILTIN_PINSRW128:
14111 icode = (fcode == IX86_BUILTIN_PINSRW
14112 ? CODE_FOR_mmx_pinsrw
14113 : CODE_FOR_sse2_pinsrw);
14114 arg0 = TREE_VALUE (arglist);
14115 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14116 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14117 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14118 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14119 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14120 tmode = insn_data[icode].operand[0].mode;
14121 mode0 = insn_data[icode].operand[1].mode;
14122 mode1 = insn_data[icode].operand[2].mode;
14123 mode2 = insn_data[icode].operand[3].mode;
14125 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14126 op0 = copy_to_mode_reg (mode0, op0);
14127 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14128 op1 = copy_to_mode_reg (mode1, op1);
14129 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14131 error ("selector must be an integer constant in the range 0..%i",
14132 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14136 || GET_MODE (target) != tmode
14137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14138 target = gen_reg_rtx (tmode);
14139 pat = GEN_FCN (icode) (target, op0, op1, op2);
14145 case IX86_BUILTIN_MASKMOVQ:
14146 case IX86_BUILTIN_MASKMOVDQU:
14147 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14148 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14149 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14150 : CODE_FOR_sse2_maskmovdqu));
14151 /* Note the arg order is different from the operand order. */
14152 arg1 = TREE_VALUE (arglist);
14153 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14154 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14155 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14156 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14157 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14158 mode0 = insn_data[icode].operand[0].mode;
14159 mode1 = insn_data[icode].operand[1].mode;
14160 mode2 = insn_data[icode].operand[2].mode;
14162 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14163 op0 = copy_to_mode_reg (mode0, op0);
14164 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14165 op1 = copy_to_mode_reg (mode1, op1);
14166 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14167 op2 = copy_to_mode_reg (mode2, op2);
14168 pat = GEN_FCN (icode) (op0, op1, op2);
14174 case IX86_BUILTIN_SQRTSS:
14175 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14176 case IX86_BUILTIN_RSQRTSS:
14177 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14178 case IX86_BUILTIN_RCPSS:
14179 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14181 case IX86_BUILTIN_LOADAPS:
14182 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14184 case IX86_BUILTIN_LOADUPS:
14185 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14187 case IX86_BUILTIN_STOREAPS:
14188 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14190 case IX86_BUILTIN_STOREUPS:
14191 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14193 case IX86_BUILTIN_LOADSS:
14194 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14196 case IX86_BUILTIN_STORESS:
14197 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14199 case IX86_BUILTIN_LOADHPS:
14200 case IX86_BUILTIN_LOADLPS:
14201 case IX86_BUILTIN_LOADHPD:
14202 case IX86_BUILTIN_LOADLPD:
14203 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14204 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14205 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14206 : CODE_FOR_sse2_movsd);
14207 arg0 = TREE_VALUE (arglist);
14208 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14209 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14210 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14211 tmode = insn_data[icode].operand[0].mode;
14212 mode0 = insn_data[icode].operand[1].mode;
14213 mode1 = insn_data[icode].operand[2].mode;
14215 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14216 op0 = copy_to_mode_reg (mode0, op0);
14217 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14219 || GET_MODE (target) != tmode
14220 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14221 target = gen_reg_rtx (tmode);
14222 pat = GEN_FCN (icode) (target, op0, op1);
14228 case IX86_BUILTIN_STOREHPS:
14229 case IX86_BUILTIN_STORELPS:
14230 case IX86_BUILTIN_STOREHPD:
14231 case IX86_BUILTIN_STORELPD:
14232 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14233 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14234 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14235 : CODE_FOR_sse2_movsd);
14236 arg0 = TREE_VALUE (arglist);
14237 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14238 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14239 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14240 mode0 = insn_data[icode].operand[1].mode;
14241 mode1 = insn_data[icode].operand[2].mode;
14243 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14244 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14245 op1 = copy_to_mode_reg (mode1, op1);
14247 pat = GEN_FCN (icode) (op0, op0, op1);
14253 case IX86_BUILTIN_MOVNTPS:
14254 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14255 case IX86_BUILTIN_MOVNTQ:
14256 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14258 case IX86_BUILTIN_LDMXCSR:
14259 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14260 target = assign_386_stack_local (SImode, 0);
14261 emit_move_insn (target, op0);
14262 emit_insn (gen_ldmxcsr (target));
14265 case IX86_BUILTIN_STMXCSR:
14266 target = assign_386_stack_local (SImode, 0);
14267 emit_insn (gen_stmxcsr (target));
14268 return copy_to_mode_reg (SImode, target);
14270 case IX86_BUILTIN_SHUFPS:
14271 case IX86_BUILTIN_SHUFPD:
14272 icode = (fcode == IX86_BUILTIN_SHUFPS
14273 ? CODE_FOR_sse_shufps
14274 : CODE_FOR_sse2_shufpd);
14275 arg0 = TREE_VALUE (arglist);
14276 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14277 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14278 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14279 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14280 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14281 tmode = insn_data[icode].operand[0].mode;
14282 mode0 = insn_data[icode].operand[1].mode;
14283 mode1 = insn_data[icode].operand[2].mode;
14284 mode2 = insn_data[icode].operand[3].mode;
14286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14287 op0 = copy_to_mode_reg (mode0, op0);
14288 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14289 op1 = copy_to_mode_reg (mode1, op1);
14290 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14292 /* @@@ better error message */
14293 error ("mask must be an immediate");
14294 return gen_reg_rtx (tmode);
14297 || GET_MODE (target) != tmode
14298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14299 target = gen_reg_rtx (tmode);
14300 pat = GEN_FCN (icode) (target, op0, op1, op2);
14306 case IX86_BUILTIN_PSHUFW:
14307 case IX86_BUILTIN_PSHUFD:
14308 case IX86_BUILTIN_PSHUFHW:
14309 case IX86_BUILTIN_PSHUFLW:
14310 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14311 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14312 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14313 : CODE_FOR_mmx_pshufw);
14314 arg0 = TREE_VALUE (arglist);
14315 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14316 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14317 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14318 tmode = insn_data[icode].operand[0].mode;
14319 mode1 = insn_data[icode].operand[1].mode;
14320 mode2 = insn_data[icode].operand[2].mode;
14322 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14323 op0 = copy_to_mode_reg (mode1, op0);
14324 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14326 /* @@@ better error message */
14327 error ("mask must be an immediate");
14331 || GET_MODE (target) != tmode
14332 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14333 target = gen_reg_rtx (tmode);
14334 pat = GEN_FCN (icode) (target, op0, op1);
14340 case IX86_BUILTIN_PSLLDQI128:
14341 case IX86_BUILTIN_PSRLDQI128:
14342 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14343 : CODE_FOR_sse2_lshrti3);
14344 arg0 = TREE_VALUE (arglist);
14345 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14346 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14347 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14348 tmode = insn_data[icode].operand[0].mode;
14349 mode1 = insn_data[icode].operand[1].mode;
14350 mode2 = insn_data[icode].operand[2].mode;
14352 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14354 op0 = copy_to_reg (op0);
14355 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14357 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14359 error ("shift must be an immediate");
14362 target = gen_reg_rtx (V2DImode);
14363 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14369 case IX86_BUILTIN_FEMMS:
14370 emit_insn (gen_femms ());
14373 case IX86_BUILTIN_PAVGUSB:
14374 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14376 case IX86_BUILTIN_PF2ID:
14377 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14379 case IX86_BUILTIN_PFACC:
14380 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14382 case IX86_BUILTIN_PFADD:
14383 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14385 case IX86_BUILTIN_PFCMPEQ:
14386 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14388 case IX86_BUILTIN_PFCMPGE:
14389 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14391 case IX86_BUILTIN_PFCMPGT:
14392 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14394 case IX86_BUILTIN_PFMAX:
14395 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14397 case IX86_BUILTIN_PFMIN:
14398 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14400 case IX86_BUILTIN_PFMUL:
14401 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14403 case IX86_BUILTIN_PFRCP:
14404 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14406 case IX86_BUILTIN_PFRCPIT1:
14407 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14409 case IX86_BUILTIN_PFRCPIT2:
14410 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14412 case IX86_BUILTIN_PFRSQIT1:
14413 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14415 case IX86_BUILTIN_PFRSQRT:
14416 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14418 case IX86_BUILTIN_PFSUB:
14419 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14421 case IX86_BUILTIN_PFSUBR:
14422 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14424 case IX86_BUILTIN_PI2FD:
14425 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14427 case IX86_BUILTIN_PMULHRW:
14428 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14430 case IX86_BUILTIN_PF2IW:
14431 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14433 case IX86_BUILTIN_PFNACC:
14434 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14436 case IX86_BUILTIN_PFPNACC:
14437 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14439 case IX86_BUILTIN_PI2FW:
14440 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14442 case IX86_BUILTIN_PSWAPDSI:
14443 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14445 case IX86_BUILTIN_PSWAPDSF:
14446 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14448 case IX86_BUILTIN_SSE_ZERO:
14449 target = gen_reg_rtx (V4SFmode);
14450 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14453 case IX86_BUILTIN_MMX_ZERO:
14454 target = gen_reg_rtx (DImode);
14455 emit_insn (gen_mmx_clrdi (target));
14458 case IX86_BUILTIN_CLRTI:
14459 target = gen_reg_rtx (V2DImode);
14460 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14464 case IX86_BUILTIN_SQRTSD:
14465 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14466 case IX86_BUILTIN_LOADAPD:
14467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14468 case IX86_BUILTIN_LOADUPD:
14469 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14471 case IX86_BUILTIN_STOREAPD:
14472 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14473 case IX86_BUILTIN_STOREUPD:
14474 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14476 case IX86_BUILTIN_LOADSD:
14477 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14479 case IX86_BUILTIN_STORESD:
14480 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14482 case IX86_BUILTIN_SETPD1:
14483 target = assign_386_stack_local (DFmode, 0);
14484 arg0 = TREE_VALUE (arglist);
14485 emit_move_insn (adjust_address (target, DFmode, 0),
14486 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14487 op0 = gen_reg_rtx (V2DFmode);
14488 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14489 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14492 case IX86_BUILTIN_SETPD:
14493 target = assign_386_stack_local (V2DFmode, 0);
14494 arg0 = TREE_VALUE (arglist);
14495 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14496 emit_move_insn (adjust_address (target, DFmode, 0),
14497 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14498 emit_move_insn (adjust_address (target, DFmode, 8),
14499 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14500 op0 = gen_reg_rtx (V2DFmode);
14501 emit_insn (gen_sse2_movapd (op0, target));
14504 case IX86_BUILTIN_LOADRPD:
14505 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14506 gen_reg_rtx (V2DFmode), 1);
14507 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14510 case IX86_BUILTIN_LOADPD1:
14511 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14512 gen_reg_rtx (V2DFmode), 1);
14513 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14516 case IX86_BUILTIN_STOREPD1:
14517 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14518 case IX86_BUILTIN_STORERPD:
14519 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14521 case IX86_BUILTIN_CLRPD:
14522 target = gen_reg_rtx (V2DFmode);
14523 emit_insn (gen_sse_clrv2df (target));
14526 case IX86_BUILTIN_MFENCE:
14527 emit_insn (gen_sse2_mfence ());
14529 case IX86_BUILTIN_LFENCE:
14530 emit_insn (gen_sse2_lfence ());
14533 case IX86_BUILTIN_CLFLUSH:
14534 arg0 = TREE_VALUE (arglist);
14535 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14536 icode = CODE_FOR_sse2_clflush;
14537 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14538 op0 = copy_to_mode_reg (Pmode, op0);
14540 emit_insn (gen_sse2_clflush (op0));
14543 case IX86_BUILTIN_MOVNTPD:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14545 case IX86_BUILTIN_MOVNTDQ:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14547 case IX86_BUILTIN_MOVNTI:
14548 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14550 case IX86_BUILTIN_LOADDQA:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14552 case IX86_BUILTIN_LOADDQU:
14553 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14554 case IX86_BUILTIN_LOADD:
14555 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14557 case IX86_BUILTIN_STOREDQA:
14558 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14559 case IX86_BUILTIN_STOREDQU:
14560 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14561 case IX86_BUILTIN_STORED:
14562 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14564 case IX86_BUILTIN_MONITOR:
14565 arg0 = TREE_VALUE (arglist);
14566 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14567 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14568 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14569 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14570 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14572 op0 = copy_to_mode_reg (SImode, op0);
14574 op1 = copy_to_mode_reg (SImode, op1);
14576 op2 = copy_to_mode_reg (SImode, op2);
14577 emit_insn (gen_monitor (op0, op1, op2));
14580 case IX86_BUILTIN_MWAIT:
14581 arg0 = TREE_VALUE (arglist);
14582 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14583 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14584 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14586 op0 = copy_to_mode_reg (SImode, op0);
14588 op1 = copy_to_mode_reg (SImode, op1);
14589 emit_insn (gen_mwait (op0, op1));
14592 case IX86_BUILTIN_LOADDDUP:
14593 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14595 case IX86_BUILTIN_LDDQU:
14596 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14603 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14604 if (d->code == fcode)
14606 /* Compares are treated specially. */
14607 if (d->icode == CODE_FOR_maskcmpv4sf3
14608 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14609 || d->icode == CODE_FOR_maskncmpv4sf3
14610 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14611 || d->icode == CODE_FOR_maskcmpv2df3
14612 || d->icode == CODE_FOR_vmmaskcmpv2df3
14613 || d->icode == CODE_FOR_maskncmpv2df3
14614 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14615 return ix86_expand_sse_compare (d, arglist, target);
14617 return ix86_expand_binop_builtin (d->icode, arglist, target);
14620 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14621 if (d->code == fcode)
14622 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14624 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14625 if (d->code == fcode)
14626 return ix86_expand_sse_comi (d, arglist, target);
14628 /* @@@ Should really do something sensible here. */
14632 /* Store OPERAND to the memory after reload is completed. This means
14633 that we can't easily use assign_stack_local. */
14635 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14638 if (!reload_completed)
14640 if (TARGET_RED_ZONE)
14642 result = gen_rtx_MEM (mode,
14643 gen_rtx_PLUS (Pmode,
14645 GEN_INT (-RED_ZONE_SIZE)));
14646 emit_move_insn (result, operand);
14648 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14654 operand = gen_lowpart (DImode, operand);
14658 gen_rtx_SET (VOIDmode,
14659 gen_rtx_MEM (DImode,
14660 gen_rtx_PRE_DEC (DImode,
14661 stack_pointer_rtx)),
14667 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14676 split_di (&operand, 1, operands, operands + 1);
14678 gen_rtx_SET (VOIDmode,
14679 gen_rtx_MEM (SImode,
14680 gen_rtx_PRE_DEC (Pmode,
14681 stack_pointer_rtx)),
14684 gen_rtx_SET (VOIDmode,
14685 gen_rtx_MEM (SImode,
14686 gen_rtx_PRE_DEC (Pmode,
14687 stack_pointer_rtx)),
14692 /* It is better to store HImodes as SImodes. */
14693 if (!TARGET_PARTIAL_REG_STALL)
14694 operand = gen_lowpart (SImode, operand);
14698 gen_rtx_SET (VOIDmode,
14699 gen_rtx_MEM (GET_MODE (operand),
14700 gen_rtx_PRE_DEC (SImode,
14701 stack_pointer_rtx)),
14707 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14712 /* Free operand from the memory. */
14714 ix86_free_from_memory (enum machine_mode mode)
14716 if (!TARGET_RED_ZONE)
14720 if (mode == DImode || TARGET_64BIT)
14722 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14726 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14727 to pop or add instruction if registers are available. */
14728 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14729 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14734 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14735 QImode must go into class Q_REGS.
14736 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14737 movdf to do mem-to-mem moves through integer regs. */
14739 ix86_preferred_reload_class (rtx x, enum reg_class class)
14741 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14743 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14745 /* SSE can't load any constant directly yet. */
14746 if (SSE_CLASS_P (class))
14748 /* Floats can load 0 and 1. */
14749 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14751 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14752 if (MAYBE_SSE_CLASS_P (class))
14753 return (reg_class_subset_p (class, GENERAL_REGS)
14754 ? GENERAL_REGS : FLOAT_REGS);
14758 /* General regs can load everything. */
14759 if (reg_class_subset_p (class, GENERAL_REGS))
14760 return GENERAL_REGS;
14761 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14762 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14765 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14767 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14772 /* If we are copying between general and FP registers, we need a memory
14773 location. The same is true for SSE and MMX registers.
14775 The macro can't work reliably when one of the CLASSES is class containing
14776 registers from multiple units (SSE, MMX, integer). We avoid this by never
14777 combining those units in single alternative in the machine description.
14778 Ensure that this constraint holds to avoid unexpected surprises.
14780 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14781 enforce these sanity checks. */
14783 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14784 enum machine_mode mode, int strict)
14786 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14787 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14788 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14789 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14790 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14791 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14798 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14799 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14800 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14801 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14802 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14804 /* Return the cost of moving data from a register in class CLASS1 to
14805 one in class CLASS2.
14807 It is not required that the cost always equal 2 when FROM is the same as TO;
14808 on some machines it is expensive to move between registers if they are not
14809 general registers. */
14811 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14812 enum reg_class class2)
14814 /* In case we require secondary memory, compute cost of the store followed
14815 by load. In order to avoid bad register allocation choices, we need
14816 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14818 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14822 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14823 MEMORY_MOVE_COST (mode, class1, 1));
14824 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14825 MEMORY_MOVE_COST (mode, class2, 1));
14827 /* In case of copying from general_purpose_register we may emit multiple
14828 stores followed by single load causing memory size mismatch stall.
14829 Count this as arbitrarily high cost of 20. */
14830 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14833 /* In the case of FP/MMX moves, the registers actually overlap, and we
14834 have to switch modes in order to treat them differently. */
14835 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14836 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14842 /* Moves between SSE/MMX and integer unit are expensive. */
14843 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14844 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14845 return ix86_cost->mmxsse_to_integer;
14846 if (MAYBE_FLOAT_CLASS_P (class1))
14847 return ix86_cost->fp_move;
14848 if (MAYBE_SSE_CLASS_P (class1))
14849 return ix86_cost->sse_move;
14850 if (MAYBE_MMX_CLASS_P (class1))
14851 return ix86_cost->mmx_move;
14855 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14857 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14859 /* Flags and only flags can only hold CCmode values. */
14860 if (CC_REGNO_P (regno))
14861 return GET_MODE_CLASS (mode) == MODE_CC;
14862 if (GET_MODE_CLASS (mode) == MODE_CC
14863 || GET_MODE_CLASS (mode) == MODE_RANDOM
14864 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14866 if (FP_REGNO_P (regno))
14867 return VALID_FP_MODE_P (mode);
14868 if (SSE_REGNO_P (regno))
14869 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14870 if (MMX_REGNO_P (regno))
14872 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14873 /* We handle both integer and floats in the general purpose registers.
14874 In future we should be able to handle vector modes as well. */
14875 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14877 /* Take care for QImode values - they can be in non-QI regs, but then
14878 they do cause partial register stalls. */
14879 if (regno < 4 || mode != QImode || TARGET_64BIT)
14881 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14884 /* Return the cost of moving data of mode M between a
14885 register and memory. A value of 2 is the default; this cost is
14886 relative to those in `REGISTER_MOVE_COST'.
14888 If moving between registers and memory is more expensive than
14889 between two registers, you should define this macro to express the
14892 Model also increased moving costs of QImode registers in non
14896 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14898 if (FLOAT_CLASS_P (class))
14915 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14917 if (SSE_CLASS_P (class))
14920 switch (GET_MODE_SIZE (mode))
14934 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14936 if (MMX_CLASS_P (class))
14939 switch (GET_MODE_SIZE (mode))
14950 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14952 switch (GET_MODE_SIZE (mode))
14956 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14957 : ix86_cost->movzbl_load);
14959 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14960 : ix86_cost->int_store[0] + 4);
14963 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14965 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14966 if (mode == TFmode)
14968 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14969 * (((int) GET_MODE_SIZE (mode)
14970 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14974 /* Compute a (partial) cost for rtx X. Return true if the complete
14975 cost has been computed, and false if subexpressions should be
14976 scanned. In either case, *TOTAL contains the cost result. */
14979 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14981 enum machine_mode mode = GET_MODE (x);
14989 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14991 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14993 else if (flag_pic && SYMBOLIC_CONST (x)
14995 || (!GET_CODE (x) != LABEL_REF
14996 && (GET_CODE (x) != SYMBOL_REF
14997 || !SYMBOL_REF_LOCAL_P (x)))))
15004 if (mode == VOIDmode)
15007 switch (standard_80387_constant_p (x))
15012 default: /* Other constants */
15017 /* Start with (MEM (SYMBOL_REF)), since that's where
15018 it'll probably end up. Add a penalty for size. */
15019 *total = (COSTS_N_INSNS (1)
15020 + (flag_pic != 0 && !TARGET_64BIT)
15021 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15027 /* The zero extensions is often completely free on x86_64, so make
15028 it as cheap as possible. */
15029 if (TARGET_64BIT && mode == DImode
15030 && GET_MODE (XEXP (x, 0)) == SImode)
15032 else if (TARGET_ZERO_EXTEND_WITH_AND)
15033 *total = COSTS_N_INSNS (ix86_cost->add);
15035 *total = COSTS_N_INSNS (ix86_cost->movzx);
15039 *total = COSTS_N_INSNS (ix86_cost->movsx);
15043 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15044 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15046 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15049 *total = COSTS_N_INSNS (ix86_cost->add);
15052 if ((value == 2 || value == 3)
15053 && !TARGET_DECOMPOSE_LEA
15054 && ix86_cost->lea <= ix86_cost->shift_const)
15056 *total = COSTS_N_INSNS (ix86_cost->lea);
15066 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15068 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15070 if (INTVAL (XEXP (x, 1)) > 32)
15071 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15073 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15077 if (GET_CODE (XEXP (x, 1)) == AND)
15078 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15080 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15085 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15086 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15088 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15093 if (FLOAT_MODE_P (mode))
15094 *total = COSTS_N_INSNS (ix86_cost->fmul);
15095 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15097 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15100 for (nbits = 0; value != 0; value >>= 1)
15103 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15104 + nbits * ix86_cost->mult_bit);
15108 /* This is arbitrary */
15109 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15110 + 7 * ix86_cost->mult_bit);
15118 if (FLOAT_MODE_P (mode))
15119 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15121 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15125 if (FLOAT_MODE_P (mode))
15126 *total = COSTS_N_INSNS (ix86_cost->fadd);
15127 else if (!TARGET_DECOMPOSE_LEA
15128 && GET_MODE_CLASS (mode) == MODE_INT
15129 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15131 if (GET_CODE (XEXP (x, 0)) == PLUS
15132 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15133 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15134 && CONSTANT_P (XEXP (x, 1)))
15136 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15137 if (val == 2 || val == 4 || val == 8)
15139 *total = COSTS_N_INSNS (ix86_cost->lea);
15140 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15141 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15143 *total += rtx_cost (XEXP (x, 1), outer_code);
15147 else if (GET_CODE (XEXP (x, 0)) == MULT
15148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15150 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15151 if (val == 2 || val == 4 || val == 8)
15153 *total = COSTS_N_INSNS (ix86_cost->lea);
15154 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15155 *total += rtx_cost (XEXP (x, 1), outer_code);
15159 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15161 *total = COSTS_N_INSNS (ix86_cost->lea);
15162 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15163 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15164 *total += rtx_cost (XEXP (x, 1), outer_code);
15171 if (FLOAT_MODE_P (mode))
15173 *total = COSTS_N_INSNS (ix86_cost->fadd);
15181 if (!TARGET_64BIT && mode == DImode)
15183 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15184 + (rtx_cost (XEXP (x, 0), outer_code)
15185 << (GET_MODE (XEXP (x, 0)) != DImode))
15186 + (rtx_cost (XEXP (x, 1), outer_code)
15187 << (GET_MODE (XEXP (x, 1)) != DImode)));
15193 if (FLOAT_MODE_P (mode))
15195 *total = COSTS_N_INSNS (ix86_cost->fchs);
15201 if (!TARGET_64BIT && mode == DImode)
15202 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15204 *total = COSTS_N_INSNS (ix86_cost->add);
15208 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15213 if (FLOAT_MODE_P (mode))
15214 *total = COSTS_N_INSNS (ix86_cost->fabs);
15218 if (FLOAT_MODE_P (mode))
15219 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15223 if (XINT (x, 1) == UNSPEC_TP)
15232 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15234 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15237 fputs ("\tpushl $", asm_out_file);
15238 assemble_name (asm_out_file, XSTR (symbol, 0));
15239 fputc ('\n', asm_out_file);
15245 static int current_machopic_label_num;
15247 /* Given a symbol name and its associated stub, write out the
15248 definition of the stub. */
15251 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15253 unsigned int length;
15254 char *binder_name, *symbol_name, lazy_ptr_name[32];
15255 int label = ++current_machopic_label_num;
15257 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15258 symb = (*targetm.strip_name_encoding) (symb);
15260 length = strlen (stub);
15261 binder_name = alloca (length + 32);
15262 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15264 length = strlen (symb);
15265 symbol_name = alloca (length + 32);
15266 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15268 sprintf (lazy_ptr_name, "L%d$lz", label);
15271 machopic_picsymbol_stub_section ();
15273 machopic_symbol_stub_section ();
15275 fprintf (file, "%s:\n", stub);
15276 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15280 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15281 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15282 fprintf (file, "\tjmp %%edx\n");
15285 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15287 fprintf (file, "%s:\n", binder_name);
15291 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15292 fprintf (file, "\tpushl %%eax\n");
15295 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15297 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15299 machopic_lazy_symbol_ptr_section ();
15300 fprintf (file, "%s:\n", lazy_ptr_name);
15301 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15302 fprintf (file, "\t.long %s\n", binder_name);
15304 #endif /* TARGET_MACHO */
15306 /* Order the registers for register allocator. */
15309 x86_order_regs_for_local_alloc (void)
15314 /* First allocate the local general purpose registers. */
15315 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15316 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15317 reg_alloc_order [pos++] = i;
15319 /* Global general purpose registers. */
15320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15321 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15322 reg_alloc_order [pos++] = i;
15324 /* x87 registers come first in case we are doing FP math
15326 if (!TARGET_SSE_MATH)
15327 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15328 reg_alloc_order [pos++] = i;
15330 /* SSE registers. */
15331 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15332 reg_alloc_order [pos++] = i;
15333 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15334 reg_alloc_order [pos++] = i;
15336 /* x87 registers. */
15337 if (TARGET_SSE_MATH)
15338 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15339 reg_alloc_order [pos++] = i;
15341 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15342 reg_alloc_order [pos++] = i;
15344 /* Initialize the rest of array as we do not allocate some registers
15346 while (pos < FIRST_PSEUDO_REGISTER)
15347 reg_alloc_order [pos++] = 0;
15350 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15351 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15354 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15355 struct attribute_spec.handler. */
15357 ix86_handle_struct_attribute (tree *node, tree name,
15358 tree args ATTRIBUTE_UNUSED,
15359 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15362 if (DECL_P (*node))
15364 if (TREE_CODE (*node) == TYPE_DECL)
15365 type = &TREE_TYPE (*node);
15370 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15371 || TREE_CODE (*type) == UNION_TYPE)))
15373 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15374 *no_add_attrs = true;
15377 else if ((is_attribute_p ("ms_struct", name)
15378 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15379 || ((is_attribute_p ("gcc_struct", name)
15380 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15382 warning ("`%s' incompatible attribute ignored",
15383 IDENTIFIER_POINTER (name));
15384 *no_add_attrs = true;
15391 ix86_ms_bitfield_layout_p (tree record_type)
15393 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15394 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15395 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15398 /* Returns an expression indicating where the this parameter is
15399 located on entry to the FUNCTION. */
15402 x86_this_parameter (tree function)
15404 tree type = TREE_TYPE (function);
15408 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15409 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15412 if (ix86_function_regparm (type, function) > 0)
15416 parm = TYPE_ARG_TYPES (type);
15417 /* Figure out whether or not the function has a variable number of
15419 for (; parm; parm = TREE_CHAIN (parm))
15420 if (TREE_VALUE (parm) == void_type_node)
15422 /* If not, the this parameter is in the first argument. */
15426 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15428 return gen_rtx_REG (SImode, regno);
15432 if (aggregate_value_p (TREE_TYPE (type), type))
15433 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15435 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15438 /* Determine whether x86_output_mi_thunk can succeed. */
15441 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15442 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15443 HOST_WIDE_INT vcall_offset, tree function)
15445 /* 64-bit can handle anything. */
15449 /* For 32-bit, everything's fine if we have one free register. */
15450 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15453 /* Need a free register for vcall_offset. */
15457 /* Need a free register for GOT references. */
15458 if (flag_pic && !(*targetm.binds_local_p) (function))
15461 /* Otherwise ok. */
15465 /* Output the assembler code for a thunk function. THUNK_DECL is the
15466 declaration for the thunk function itself, FUNCTION is the decl for
15467 the target function. DELTA is an immediate constant offset to be
15468 added to THIS. If VCALL_OFFSET is nonzero, the word at
15469 *(*this + vcall_offset) should be added to THIS. */
15472 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15473 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15474 HOST_WIDE_INT vcall_offset, tree function)
15477 rtx this = x86_this_parameter (function);
15480 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15481 pull it in now and let DELTA benefit. */
15484 else if (vcall_offset)
15486 /* Put the this parameter into %eax. */
15488 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15489 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15492 this_reg = NULL_RTX;
15494 /* Adjust the this parameter by a fixed constant. */
15497 xops[0] = GEN_INT (delta);
15498 xops[1] = this_reg ? this_reg : this;
15501 if (!x86_64_general_operand (xops[0], DImode))
15503 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15505 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15509 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15512 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15515 /* Adjust the this parameter by a value stored in the vtable. */
15519 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15522 int tmp_regno = 2 /* ECX */;
15523 if (lookup_attribute ("fastcall",
15524 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15525 tmp_regno = 0 /* EAX */;
15526 tmp = gen_rtx_REG (SImode, tmp_regno);
15529 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15532 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15534 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15536 /* Adjust the this parameter. */
15537 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15538 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15540 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15541 xops[0] = GEN_INT (vcall_offset);
15543 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15544 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15546 xops[1] = this_reg;
15548 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15550 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15553 /* If necessary, drop THIS back to its stack slot. */
15554 if (this_reg && this_reg != this)
15556 xops[0] = this_reg;
15558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15561 xops[0] = XEXP (DECL_RTL (function), 0);
15564 if (!flag_pic || (*targetm.binds_local_p) (function))
15565 output_asm_insn ("jmp\t%P0", xops);
15568 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15569 tmp = gen_rtx_CONST (Pmode, tmp);
15570 tmp = gen_rtx_MEM (QImode, tmp);
15572 output_asm_insn ("jmp\t%A0", xops);
15577 if (!flag_pic || (*targetm.binds_local_p) (function))
15578 output_asm_insn ("jmp\t%P0", xops);
15583 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15584 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15585 tmp = gen_rtx_MEM (QImode, tmp);
15587 output_asm_insn ("jmp\t%0", xops);
15590 #endif /* TARGET_MACHO */
15592 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15593 output_set_got (tmp);
15596 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15597 output_asm_insn ("jmp\t{*}%1", xops);
15603 x86_file_start (void)
15605 default_file_start ();
15606 if (X86_FILE_START_VERSION_DIRECTIVE)
15607 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15608 if (X86_FILE_START_FLTUSED)
15609 fputs ("\t.global\t__fltused\n", asm_out_file);
15610 if (ix86_asm_dialect == ASM_INTEL)
15611 fputs ("\t.intel_syntax\n", asm_out_file);
15615 x86_field_alignment (tree field, int computed)
15617 enum machine_mode mode;
15618 tree type = TREE_TYPE (field);
15620 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15622 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15623 ? get_inner_array_type (type) : type);
15624 if (mode == DFmode || mode == DCmode
15625 || GET_MODE_CLASS (mode) == MODE_INT
15626 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15627 return MIN (32, computed);
15631 /* Output assembler code to FILE to increment profiler label # LABELNO
15632 for profiling a function entry. */
15634 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15639 #ifndef NO_PROFILE_COUNTERS
15640 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15642 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15646 #ifndef NO_PROFILE_COUNTERS
15647 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15649 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15653 #ifndef NO_PROFILE_COUNTERS
15654 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15655 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15657 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15661 #ifndef NO_PROFILE_COUNTERS
15662 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15663 PROFILE_COUNT_REGISTER);
15665 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15669 /* We don't have exact information about the insn sizes, but we may assume
15670 quite safely that we are informed about all 1 byte insns and memory
15671 address sizes. This is enough to eliminate unnecessary padding in
15675 min_insn_size (rtx insn)
15679 if (!INSN_P (insn) || !active_insn_p (insn))
15682 /* Discard alignments we've emit and jump instructions. */
15683 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15684 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15686 if (GET_CODE (insn) == JUMP_INSN
15687 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15688 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15691 /* Important case - calls are always 5 bytes.
15692 It is common to have many calls in the row. */
15693 if (GET_CODE (insn) == CALL_INSN
15694 && symbolic_reference_mentioned_p (PATTERN (insn))
15695 && !SIBLING_CALL_P (insn))
15697 if (get_attr_length (insn) <= 1)
15700 /* For normal instructions we may rely on the sizes of addresses
15701 and the presence of symbol to require 4 bytes of encoding.
15702 This is not the case for jumps where references are PC relative. */
15703 if (GET_CODE (insn) != JUMP_INSN)
15705 l = get_attr_length_address (insn);
15706 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15715 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15719 ix86_avoid_jump_misspredicts (void)
15721 rtx insn, start = get_insns ();
15722 int nbytes = 0, njumps = 0;
15725 /* Look for all minimal intervals of instructions containing 4 jumps.
15726 The intervals are bounded by START and INSN. NBYTES is the total
15727 size of instructions in the interval including INSN and not including
15728 START. When the NBYTES is smaller than 16 bytes, it is possible
15729 that the end of START and INSN ends up in the same 16byte page.
15731 The smallest offset in the page INSN can start is the case where START
15732 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15733 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15735 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15738 nbytes += min_insn_size (insn);
15740 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15741 INSN_UID (insn), min_insn_size (insn));
15742 if ((GET_CODE (insn) == JUMP_INSN
15743 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15744 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15745 || GET_CODE (insn) == CALL_INSN)
15752 start = NEXT_INSN (start);
15753 if ((GET_CODE (start) == JUMP_INSN
15754 && GET_CODE (PATTERN (start)) != ADDR_VEC
15755 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15756 || GET_CODE (start) == CALL_INSN)
15757 njumps--, isjump = 1;
15760 nbytes -= min_insn_size (start);
15765 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15766 INSN_UID (start), INSN_UID (insn), nbytes);
15768 if (njumps == 3 && isjump && nbytes < 16)
15770 int padsize = 15 - nbytes + min_insn_size (insn);
15773 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15774 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15779 /* AMD Athlon works faster
15780 when RET is not destination of conditional jump or directly preceded
15781 by other jump instruction. We avoid the penalty by inserting NOP just
15782 before the RET instructions in such cases. */
15784 ix86_pad_returns (void)
15788 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15790 basic_block bb = e->src;
15791 rtx ret = BB_END (bb);
15793 bool replace = false;
15795 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15796 || !maybe_hot_bb_p (bb))
15798 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15799 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15801 if (prev && GET_CODE (prev) == CODE_LABEL)
15804 for (e = bb->pred; e; e = e->pred_next)
15805 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15806 && !(e->flags & EDGE_FALLTHRU))
15811 prev = prev_active_insn (ret);
15813 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15814 || GET_CODE (prev) == CALL_INSN))
15816 /* Empty functions get branch mispredict even when the jump destination
15817 is not visible to us. */
15818 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15823 emit_insn_before (gen_return_internal_long (), ret);
15829 /* Implement machine specific optimizations. We implement padding of returns
15830 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15834 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15835 ix86_pad_returns ();
15836 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15837 ix86_avoid_jump_misspredicts ();
15840 /* Return nonzero when QImode register that must be represented via REX prefix
15843 x86_extended_QIreg_mentioned_p (rtx insn)
15846 extract_insn_cached (insn);
15847 for (i = 0; i < recog_data.n_operands; i++)
15848 if (REG_P (recog_data.operand[i])
15849 && REGNO (recog_data.operand[i]) >= 4)
15854 /* Return nonzero when P points to register encoded via REX prefix.
15855 Called via for_each_rtx. */
15857 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15859 unsigned int regno;
15862 regno = REGNO (*p);
15863 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15866 /* Return true when INSN mentions register that must be encoded using REX
15869 x86_extended_reg_mentioned_p (rtx insn)
15871 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15874 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15875 optabs would emit if we didn't have TFmode patterns. */
15878 x86_emit_floatuns (rtx operands[2])
15880 rtx neglab, donelab, i0, i1, f0, in, out;
15881 enum machine_mode mode, inmode;
15883 inmode = GET_MODE (operands[1]);
15884 if (inmode != SImode
15885 && inmode != DImode)
15889 in = force_reg (inmode, operands[1]);
15890 mode = GET_MODE (out);
15891 neglab = gen_label_rtx ();
15892 donelab = gen_label_rtx ();
15893 i1 = gen_reg_rtx (Pmode);
15894 f0 = gen_reg_rtx (mode);
15896 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15898 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15899 emit_jump_insn (gen_jump (donelab));
15902 emit_label (neglab);
15904 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15905 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15906 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15907 expand_float (f0, i0, 0);
15908 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15910 emit_label (donelab);
15913 /* Return if we do not know how to pass TYPE solely in registers. */
15915 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15917 if (default_must_pass_in_stack (mode, type))
15919 return (!TARGET_64BIT && type && mode == TImode);
15922 /* Initialize vector TARGET via VALS. */
15924 ix86_expand_vector_init (rtx target, rtx vals)
15926 enum machine_mode mode = GET_MODE (target);
15927 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15928 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15931 for (i = n_elts - 1; i >= 0; i--)
15932 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15933 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15936 /* Few special cases first...
15937 ... constants are best loaded from constant pool. */
15940 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15944 /* ... values where only first field is non-constant are best loaded
15945 from the pool and overwriten via move later. */
15948 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15949 GET_MODE_INNER (mode), 0);
15951 op = force_reg (mode, op);
15952 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15953 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15954 switch (GET_MODE (target))
15957 emit_insn (gen_sse2_movsd (target, target, op));
15960 emit_insn (gen_sse_movss (target, target, op));
15968 /* And the busy sequence doing rotations. */
15969 switch (GET_MODE (target))
15974 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15976 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15978 vecop0 = force_reg (V2DFmode, vecop0);
15979 vecop1 = force_reg (V2DFmode, vecop1);
15980 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15986 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15988 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15990 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15992 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15993 rtx tmp1 = gen_reg_rtx (V4SFmode);
15994 rtx tmp2 = gen_reg_rtx (V4SFmode);
15996 vecop0 = force_reg (V4SFmode, vecop0);
15997 vecop1 = force_reg (V4SFmode, vecop1);
15998 vecop2 = force_reg (V4SFmode, vecop2);
15999 vecop3 = force_reg (V4SFmode, vecop3);
16000 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16001 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16002 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16010 #include "gt-i386.h"