1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 rtx base, index, disp;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_function_regparm (tree, tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033 override_options (void)
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1072 PTA_PREFETCH_SSE = 8,
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 REAL_MODE_FORMAT (XFmode) = &ieee_extended_intel_96_format;
1117 REAL_MODE_FORMAT (TFmode) = &ieee_extended_intel_128_format;
1119 /* Set the default values for switches whose default depends on TARGET_64BIT
1120 in case they weren't overwritten by command line options. */
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1140 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1148 if (!ix86_arch_string)
1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1151 if (ix86_cmodel_string != 0)
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1170 ix86_cmodel = CM_32;
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 if (ix86_asm_string != 0)
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1184 error ("code model `%s' not supported in the %s bit mode",
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
1187 sorry ("code model `large' not supported yet");
1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1189 sorry ("%i-bit mode not compiled in",
1190 (target_flags & MASK_64BIT) ? 64 : 32);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
1197 ix86_tune = ix86_arch;
1198 if (processor_alias_table[i].flags & PTA_MMX
1199 && !(target_flags_explicit & MASK_MMX))
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
1202 && !(target_flags_explicit & MASK_3DNOW))
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
1205 && !(target_flags_explicit & MASK_3DNOW_A))
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
1208 && !(target_flags_explicit & MASK_SSE))
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
1211 && !(target_flags_explicit & MASK_SSE2))
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
1223 for (i = 0; i < pta_size; i++)
1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1226 ix86_tune = processor_alias_table[i].processor;
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1237 ix86_cost = &size_cost;
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
1246 /* Validate -mregparm= value. */
1247 if (ix86_regparm_string)
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1257 ix86_regparm = REGPARM_MAX;
1259 /* If the user has provided any of the -malign-* options,
1260 warn and use that value only if -falign-* is not set.
1261 Remove this code in GCC 3.2 or later. */
1262 if (ix86_align_loops_string)
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1271 align_loops = 1 << i;
1275 if (ix86_align_jumps_string)
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 align_jumps = 1 << i;
1288 if (ix86_align_funcs_string)
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 align_functions = 1 << i;
1301 /* Default align_* from the processor table. */
1302 if (align_loops == 0)
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1307 if (align_jumps == 0)
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1312 if (align_functions == 0)
1314 align_functions = processor_target_table[ix86_tune].align_func;
1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
1322 ? TARGET_64BIT ? 128 : 32
1324 if (ix86_preferred_stack_boundary_string)
1326 i = atoi (ix86_preferred_stack_boundary_string);
1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1329 TARGET_64BIT ? 4 : 2);
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1334 /* Validate -mbranch-cost= value, or provide default. */
1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1336 if (ix86_branch_cost_string)
1338 i = atoi (ix86_branch_cost_string);
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1342 ix86_branch_cost = i;
1345 if (ix86_tls_dialect_string)
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1358 flag_omit_frame_pointer = 1;
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
1362 if (flag_unsafe_math_optimizations)
1363 target_flags &= ~MASK_IEEE_FP;
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1370 /* Turn on SSE2 builtins for -mpni. */
1372 target_flags |= MASK_SSE2;
1374 /* Turn on SSE builtins for -msse2. */
1376 target_flags |= MASK_SSE;
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1537 /* We're looking at the CALL_EXPR, we need the type of the function. */
1538 type = TREE_OPERAND (exp, 0); /* pointer expression */
1539 type = TREE_TYPE (type); /* pointer type */
1540 type = TREE_TYPE (type); /* function type */
1542 if (ix86_function_regparm (type, NULL) >= 3)
1544 /* ??? Need to count the actual number of registers to be used,
1545 not the possible number of registers. Fix later. */
1550 /* Otherwise okay. That also includes certain types of indirect calls. */
1554 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1555 arguments as in struct attribute_spec.handler. */
1557 ix86_handle_cdecl_attribute (tree *node, tree name,
1558 tree args ATTRIBUTE_UNUSED,
1559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1572 if (is_attribute_p ("fastcall", name))
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1576 error ("fastcall and stdcall attributes are not compatible");
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and regparm attributes are not compatible");
1583 else if (is_attribute_p ("stdcall", name))
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1587 error ("fastcall and stdcall attributes are not compatible");
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1601 /* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1604 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1607 if (TREE_CODE (*node) != FUNCTION_TYPE
1608 && TREE_CODE (*node) != METHOD_TYPE
1609 && TREE_CODE (*node) != FIELD_DECL
1610 && TREE_CODE (*node) != TYPE_DECL)
1612 warning ("`%s' attribute only applies to functions",
1613 IDENTIFIER_POINTER (name));
1614 *no_add_attrs = true;
1620 cst = TREE_VALUE (args);
1621 if (TREE_CODE (cst) != INTEGER_CST)
1623 warning ("`%s' attribute requires an integer constant argument",
1624 IDENTIFIER_POINTER (name));
1625 *no_add_attrs = true;
1627 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1629 warning ("argument to `%s' attribute larger than %d",
1630 IDENTIFIER_POINTER (name), REGPARM_MAX);
1631 *no_add_attrs = true;
1634 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1636 error ("fastcall and regparm attributes are not compatible");
1643 /* Return 0 if the attributes for two types are incompatible, 1 if they
1644 are compatible, and 2 if they are nearly compatible (which causes a
1645 warning to be generated). */
1648 ix86_comp_type_attributes (tree type1, tree type2)
1650 /* Check for mismatch of non-default calling convention. */
1651 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1653 if (TREE_CODE (type1) != FUNCTION_TYPE)
1656 /* Check for mismatched fastcall types */
1657 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1658 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1661 /* Check for mismatched return types (cdecl vs stdcall). */
1662 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1668 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1669 DECL may be NULL when calling function indirectly
1670 or considerling a libcall. */
1673 ix86_function_regparm (tree type, tree decl)
1676 int regparm = ix86_regparm;
1677 bool user_convention = false;
1681 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1684 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1685 user_convention = true;
1688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1691 user_convention = true;
1694 /* Use register calling convention for local functions when possible. */
1695 if (!TARGET_64BIT && !user_convention && decl
1696 && flag_unit_at_a_time)
1698 struct cgraph_local_info *i = cgraph_local_info (decl);
1701 /* We can't use regparm(3) for nested functions as these use
1702 static chain pointer in third argument. */
1703 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1713 /* Value is the number of bytes of arguments automatically
1714 popped when returning from a subroutine call.
1715 FUNDECL is the declaration node of the function (as a tree),
1716 FUNTYPE is the data type of the function (as a tree),
1717 or for a library call it is an identifier node for the subroutine name.
1718 SIZE is the number of bytes of arguments passed on the stack.
1720 On the 80386, the RTD insn may be used to pop them if the number
1721 of args is fixed, but if the number is variable then the caller
1722 must pop them all. RTD can't be used for library calls now
1723 because the library is compiled with the Unix compiler.
1724 Use of RTD is a selectable option, since it is incompatible with
1725 standard Unix calling sequences. If the option is not selected,
1726 the caller must always pop the args.
1728 The attribute stdcall is equivalent to RTD on a per module basis. */
1731 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1733 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1735 /* Cdecl functions override -mrtd, and never pop the stack. */
1736 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1738 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1739 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1740 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1744 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1745 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1746 == void_type_node)))
1750 /* Lose any fake structure return argument if it is passed on the stack. */
1751 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1754 int nregs = ix86_function_regparm (funtype, fundecl);
1757 return GET_MODE_SIZE (Pmode);
1763 /* Argument support functions. */
1765 /* Return true when register may be used to pass function parameters. */
1767 ix86_function_arg_regno_p (int regno)
1771 return (regno < REGPARM_MAX
1772 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1773 if (SSE_REGNO_P (regno) && TARGET_SSE)
1775 /* RAX is used as hidden argument to va_arg functions. */
1778 for (i = 0; i < REGPARM_MAX; i++)
1779 if (regno == x86_64_int_parameter_registers[i])
1784 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1785 for a call to a function whose data type is FNTYPE.
1786 For a library call, FNTYPE is 0. */
1789 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1790 tree fntype, /* tree ptr for function decl */
1791 rtx libname, /* SYMBOL_REF of library name or 0 */
1794 static CUMULATIVE_ARGS zero_cum;
1795 tree param, next_param;
1797 if (TARGET_DEBUG_ARG)
1799 fprintf (stderr, "\ninit_cumulative_args (");
1801 fprintf (stderr, "fntype code = %s, ret code = %s",
1802 tree_code_name[(int) TREE_CODE (fntype)],
1803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1805 fprintf (stderr, "no fntype");
1808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1813 /* Set up the number of registers to use for passing arguments. */
1815 cum->nregs = ix86_function_regparm (fntype, fndecl);
1817 cum->nregs = ix86_regparm;
1818 cum->sse_nregs = SSE_REGPARM_MAX;
1819 cum->maybe_vaarg = false;
1821 /* Use ecx and edx registers if function has fastcall attribute */
1822 if (fntype && !TARGET_64BIT)
1824 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1832 /* Determine if this function has variable arguments. This is
1833 indicated by the last argument being 'void_type_mode' if there
1834 are no variable arguments. If there are variable arguments, then
1835 we won't pass anything in registers */
1839 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1840 param != 0; param = next_param)
1842 next_param = TREE_CHAIN (param);
1843 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1850 cum->maybe_vaarg = true;
1854 if ((!fntype && !libname)
1855 || (fntype && !TYPE_ARG_TYPES (fntype)))
1856 cum->maybe_vaarg = 1;
1858 if (TARGET_DEBUG_ARG)
1859 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1864 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1865 of this code is to classify each 8bytes of incoming argument by the register
1866 class and assign registers accordingly. */
1868 /* Return the union class of CLASS1 and CLASS2.
1869 See the x86-64 PS ABI for details. */
1871 static enum x86_64_reg_class
1872 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1874 /* Rule #1: If both classes are equal, this is the resulting class. */
1875 if (class1 == class2)
1878 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1880 if (class1 == X86_64_NO_CLASS)
1882 if (class2 == X86_64_NO_CLASS)
1885 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1886 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1887 return X86_64_MEMORY_CLASS;
1889 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1890 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1891 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1892 return X86_64_INTEGERSI_CLASS;
1893 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1894 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1895 return X86_64_INTEGER_CLASS;
1897 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1898 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1899 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1900 return X86_64_MEMORY_CLASS;
1902 /* Rule #6: Otherwise class SSE is used. */
1903 return X86_64_SSE_CLASS;
1906 /* Classify the argument of type TYPE and mode MODE.
1907 CLASSES will be filled by the register class used to pass each word
1908 of the operand. The number of words is returned. In case the parameter
1909 should be passed in memory, 0 is returned. As a special case for zero
1910 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1912 BIT_OFFSET is used internally for handling records and specifies offset
1913 of the offset in bits modulo 256 to avoid overflow cases.
1915 See the x86-64 PS ABI for details.
1919 classify_argument (enum machine_mode mode, tree type,
1920 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1923 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1924 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1926 /* Variable sized entities are always passed/returned in memory. */
1930 if (mode != VOIDmode
1931 && MUST_PASS_IN_STACK (mode, type))
1934 if (type && AGGREGATE_TYPE_P (type))
1938 enum x86_64_reg_class subclasses[MAX_CLASSES];
1940 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1944 for (i = 0; i < words; i++)
1945 classes[i] = X86_64_NO_CLASS;
1947 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1948 signalize memory class, so handle it as special case. */
1951 classes[0] = X86_64_NO_CLASS;
1955 /* Classify each field of record and merge classes. */
1956 if (TREE_CODE (type) == RECORD_TYPE)
1958 /* For classes first merge in the field of the subclasses. */
1959 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1961 tree bases = TYPE_BINFO_BASETYPES (type);
1962 int n_bases = TREE_VEC_LENGTH (bases);
1965 for (i = 0; i < n_bases; ++i)
1967 tree binfo = TREE_VEC_ELT (bases, i);
1969 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1970 tree type = BINFO_TYPE (binfo);
1972 num = classify_argument (TYPE_MODE (type),
1974 (offset + bit_offset) % 256);
1977 for (i = 0; i < num; i++)
1979 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1981 merge_classes (subclasses[i], classes[i + pos]);
1985 /* And now merge the fields of structure. */
1986 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1988 if (TREE_CODE (field) == FIELD_DECL)
1992 /* Bitfields are always classified as integer. Handle them
1993 early, since later code would consider them to be
1994 misaligned integers. */
1995 if (DECL_BIT_FIELD (field))
1997 for (i = int_bit_position (field) / 8 / 8;
1998 i < (int_bit_position (field)
1999 + tree_low_cst (DECL_SIZE (field), 0)
2002 merge_classes (X86_64_INTEGER_CLASS,
2007 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2008 TREE_TYPE (field), subclasses,
2009 (int_bit_position (field)
2010 + bit_offset) % 256);
2013 for (i = 0; i < num; i++)
2016 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2018 merge_classes (subclasses[i], classes[i + pos]);
2024 /* Arrays are handled as small records. */
2025 else if (TREE_CODE (type) == ARRAY_TYPE)
2028 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2029 TREE_TYPE (type), subclasses, bit_offset);
2033 /* The partial classes are now full classes. */
2034 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2035 subclasses[0] = X86_64_SSE_CLASS;
2036 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2037 subclasses[0] = X86_64_INTEGER_CLASS;
2039 for (i = 0; i < words; i++)
2040 classes[i] = subclasses[i % num];
2042 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2043 else if (TREE_CODE (type) == UNION_TYPE
2044 || TREE_CODE (type) == QUAL_UNION_TYPE)
2046 /* For classes first merge in the field of the subclasses. */
2047 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2049 tree bases = TYPE_BINFO_BASETYPES (type);
2050 int n_bases = TREE_VEC_LENGTH (bases);
2053 for (i = 0; i < n_bases; ++i)
2055 tree binfo = TREE_VEC_ELT (bases, i);
2057 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2058 tree type = BINFO_TYPE (binfo);
2060 num = classify_argument (TYPE_MODE (type),
2062 (offset + (bit_offset % 64)) % 256);
2065 for (i = 0; i < num; i++)
2067 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2069 merge_classes (subclasses[i], classes[i + pos]);
2073 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2075 if (TREE_CODE (field) == FIELD_DECL)
2078 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2079 TREE_TYPE (field), subclasses,
2083 for (i = 0; i < num; i++)
2084 classes[i] = merge_classes (subclasses[i], classes[i]);
2091 /* Final merger cleanup. */
2092 for (i = 0; i < words; i++)
2094 /* If one class is MEMORY, everything should be passed in
2096 if (classes[i] == X86_64_MEMORY_CLASS)
2099 /* The X86_64_SSEUP_CLASS should be always preceded by
2100 X86_64_SSE_CLASS. */
2101 if (classes[i] == X86_64_SSEUP_CLASS
2102 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2103 classes[i] = X86_64_SSE_CLASS;
2105 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2106 if (classes[i] == X86_64_X87UP_CLASS
2107 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2108 classes[i] = X86_64_SSE_CLASS;
2113 /* Compute alignment needed. We align all types to natural boundaries with
2114 exception of XFmode that is aligned to 64bits. */
2115 if (mode != VOIDmode && mode != BLKmode)
2117 int mode_alignment = GET_MODE_BITSIZE (mode);
2120 mode_alignment = 128;
2121 else if (mode == XCmode)
2122 mode_alignment = 256;
2123 /* Misaligned fields are always returned in memory. */
2124 if (bit_offset % mode_alignment)
2128 /* Classification of atomic types. */
2138 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2139 classes[0] = X86_64_INTEGERSI_CLASS;
2141 classes[0] = X86_64_INTEGER_CLASS;
2145 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2149 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2152 if (!(bit_offset % 64))
2153 classes[0] = X86_64_SSESF_CLASS;
2155 classes[0] = X86_64_SSE_CLASS;
2158 classes[0] = X86_64_SSEDF_CLASS;
2161 classes[0] = X86_64_X87_CLASS;
2162 classes[1] = X86_64_X87UP_CLASS;
2165 classes[0] = X86_64_X87_CLASS;
2166 classes[1] = X86_64_X87UP_CLASS;
2167 classes[2] = X86_64_X87_CLASS;
2168 classes[3] = X86_64_X87UP_CLASS;
2171 classes[0] = X86_64_SSEDF_CLASS;
2172 classes[1] = X86_64_SSEDF_CLASS;
2175 classes[0] = X86_64_SSE_CLASS;
2183 classes[0] = X86_64_SSE_CLASS;
2184 classes[1] = X86_64_SSEUP_CLASS;
2199 /* Examine the argument and return set number of register required in each
2200 class. Return 0 iff parameter should be passed in memory. */
2202 examine_argument (enum machine_mode mode, tree type, int in_return,
2203 int *int_nregs, int *sse_nregs)
2205 enum x86_64_reg_class class[MAX_CLASSES];
2206 int n = classify_argument (mode, type, class, 0);
2212 for (n--; n >= 0; n--)
2215 case X86_64_INTEGER_CLASS:
2216 case X86_64_INTEGERSI_CLASS:
2219 case X86_64_SSE_CLASS:
2220 case X86_64_SSESF_CLASS:
2221 case X86_64_SSEDF_CLASS:
2224 case X86_64_NO_CLASS:
2225 case X86_64_SSEUP_CLASS:
2227 case X86_64_X87_CLASS:
2228 case X86_64_X87UP_CLASS:
2232 case X86_64_MEMORY_CLASS:
2237 /* Construct container for the argument used by GCC interface. See
2238 FUNCTION_ARG for the detailed description. */
2240 construct_container (enum machine_mode mode, tree type, int in_return,
2241 int nintregs, int nsseregs, const int * intreg,
2244 enum machine_mode tmpmode;
2246 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2247 enum x86_64_reg_class class[MAX_CLASSES];
2251 int needed_sseregs, needed_intregs;
2252 rtx exp[MAX_CLASSES];
2255 n = classify_argument (mode, type, class, 0);
2256 if (TARGET_DEBUG_ARG)
2259 fprintf (stderr, "Memory class\n");
2262 fprintf (stderr, "Classes:");
2263 for (i = 0; i < n; i++)
2265 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2267 fprintf (stderr, "\n");
2272 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2274 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2277 /* First construct simple cases. Avoid SCmode, since we want to use
2278 single register to pass this type. */
2279 if (n == 1 && mode != SCmode)
2282 case X86_64_INTEGER_CLASS:
2283 case X86_64_INTEGERSI_CLASS:
2284 return gen_rtx_REG (mode, intreg[0]);
2285 case X86_64_SSE_CLASS:
2286 case X86_64_SSESF_CLASS:
2287 case X86_64_SSEDF_CLASS:
2288 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2289 case X86_64_X87_CLASS:
2290 return gen_rtx_REG (mode, FIRST_STACK_REG);
2291 case X86_64_NO_CLASS:
2292 /* Zero sized array, struct or class. */
2297 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2298 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2300 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2301 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2302 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2303 && class[1] == X86_64_INTEGER_CLASS
2304 && (mode == CDImode || mode == TImode)
2305 && intreg[0] + 1 == intreg[1])
2306 return gen_rtx_REG (mode, intreg[0]);
2308 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2309 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2310 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2312 /* Otherwise figure out the entries of the PARALLEL. */
2313 for (i = 0; i < n; i++)
2317 case X86_64_NO_CLASS:
2319 case X86_64_INTEGER_CLASS:
2320 case X86_64_INTEGERSI_CLASS:
2321 /* Merge TImodes on aligned occasions here too. */
2322 if (i * 8 + 8 > bytes)
2323 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2324 else if (class[i] == X86_64_INTEGERSI_CLASS)
2328 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2329 if (tmpmode == BLKmode)
2331 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2332 gen_rtx_REG (tmpmode, *intreg),
2336 case X86_64_SSESF_CLASS:
2337 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2338 gen_rtx_REG (SFmode,
2339 SSE_REGNO (sse_regno)),
2343 case X86_64_SSEDF_CLASS:
2344 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2345 gen_rtx_REG (DFmode,
2346 SSE_REGNO (sse_regno)),
2350 case X86_64_SSE_CLASS:
2351 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2355 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2356 gen_rtx_REG (tmpmode,
2357 SSE_REGNO (sse_regno)),
2359 if (tmpmode == TImode)
2367 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2368 for (i = 0; i < nexps; i++)
2369 XVECEXP (ret, 0, i) = exp [i];
2373 /* Update the data in CUM to advance over an argument
2374 of mode MODE and data type TYPE.
2375 (TYPE is null for libcalls where that information may not be available.) */
2378 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2379 enum machine_mode mode, /* current arg mode */
2380 tree type, /* type of the argument or 0 if lib support */
2381 int named) /* whether or not the argument was named */
2384 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2385 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2387 if (TARGET_DEBUG_ARG)
2389 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2390 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2393 int int_nregs, sse_nregs;
2394 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2395 cum->words += words;
2396 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2398 cum->nregs -= int_nregs;
2399 cum->sse_nregs -= sse_nregs;
2400 cum->regno += int_nregs;
2401 cum->sse_regno += sse_nregs;
2404 cum->words += words;
2408 if (TARGET_SSE && mode == TImode)
2410 cum->sse_words += words;
2411 cum->sse_nregs -= 1;
2412 cum->sse_regno += 1;
2413 if (cum->sse_nregs <= 0)
2421 cum->words += words;
2422 cum->nregs -= words;
2423 cum->regno += words;
2425 if (cum->nregs <= 0)
2435 /* Define where to put the arguments to a function.
2436 Value is zero to push the argument on the stack,
2437 or a hard register in which to store the argument.
2439 MODE is the argument's machine mode.
2440 TYPE is the data type of the argument (as a tree).
2441 This is null for libcalls where that information may
2443 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2444 the preceding args and about the function being called.
2445 NAMED is nonzero if this argument is a named parameter
2446 (otherwise it is an extra parameter matching an ellipsis). */
2449 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2450 enum machine_mode mode, /* current arg mode */
2451 tree type, /* type of the argument or 0 if lib support */
2452 int named) /* != 0 for normal args, == 0 for ... args */
2456 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2457 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2459 /* Handle a hidden AL argument containing number of registers for varargs
2460 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2462 if (mode == VOIDmode)
2465 return GEN_INT (cum->maybe_vaarg
2466 ? (cum->sse_nregs < 0
2474 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2475 &x86_64_int_parameter_registers [cum->regno],
2480 /* For now, pass fp/complex values on the stack. */
2492 if (words <= cum->nregs)
2494 int regno = cum->regno;
2496 /* Fastcall allocates the first two DWORD (SImode) or
2497 smaller arguments to ECX and EDX. */
2500 if (mode == BLKmode || mode == DImode)
2503 /* ECX not EAX is the first allocated register. */
2507 ret = gen_rtx_REG (mode, regno);
2512 ret = gen_rtx_REG (mode, cum->sse_regno);
2516 if (TARGET_DEBUG_ARG)
2519 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2520 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2523 print_simple_rtl (stderr, ret);
2525 fprintf (stderr, ", stack");
2527 fprintf (stderr, " )\n");
2533 /* A C expression that indicates when an argument must be passed by
2534 reference. If nonzero for an argument, a copy of that argument is
2535 made in memory and a pointer to the argument is passed instead of
2536 the argument itself. The pointer is passed in whatever way is
2537 appropriate for passing a pointer to that type. */
2540 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2541 enum machine_mode mode ATTRIBUTE_UNUSED,
2542 tree type, int named ATTRIBUTE_UNUSED)
2547 if (type && int_size_in_bytes (type) == -1)
2549 if (TARGET_DEBUG_ARG)
2550 fprintf (stderr, "function_arg_pass_by_reference\n");
2557 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2560 contains_128bit_aligned_vector_p (tree type)
2562 enum machine_mode mode = TYPE_MODE (type);
2563 if (SSE_REG_MODE_P (mode)
2564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2566 if (TYPE_ALIGN (type) < 128)
2569 if (AGGREGATE_TYPE_P (type))
2571 /* Walk the aggregates recursively. */
2572 if (TREE_CODE (type) == RECORD_TYPE
2573 || TREE_CODE (type) == UNION_TYPE
2574 || TREE_CODE (type) == QUAL_UNION_TYPE)
2578 if (TYPE_BINFO (type) != NULL
2579 && TYPE_BINFO_BASETYPES (type) != NULL)
2581 tree bases = TYPE_BINFO_BASETYPES (type);
2582 int n_bases = TREE_VEC_LENGTH (bases);
2585 for (i = 0; i < n_bases; ++i)
2587 tree binfo = TREE_VEC_ELT (bases, i);
2588 tree type = BINFO_TYPE (binfo);
2590 if (contains_128bit_aligned_vector_p (type))
2594 /* And now merge the fields of structure. */
2595 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2597 if (TREE_CODE (field) == FIELD_DECL
2598 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2602 /* Just for use if some languages passes arrays by value. */
2603 else if (TREE_CODE (type) == ARRAY_TYPE)
2605 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2614 /* Gives the alignment boundary, in bits, of an argument with the
2615 specified mode and type. */
2618 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2622 align = TYPE_ALIGN (type);
2624 align = GET_MODE_ALIGNMENT (mode);
2625 if (align < PARM_BOUNDARY)
2626 align = PARM_BOUNDARY;
2629 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2630 make an exception for SSE modes since these require 128bit
2633 The handling here differs from field_alignment. ICC aligns MMX
2634 arguments to 4 byte boundaries, while structure fields are aligned
2635 to 8 byte boundaries. */
2638 if (!SSE_REG_MODE_P (mode))
2639 align = PARM_BOUNDARY;
2643 if (!contains_128bit_aligned_vector_p (type))
2644 align = PARM_BOUNDARY;
2652 /* Return true if N is a possible register number of function value. */
2654 ix86_function_value_regno_p (int regno)
2658 return ((regno) == 0
2659 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2660 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2662 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2663 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2664 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2667 /* Define how to find the value returned by a function.
2668 VALTYPE is the data type of the value (as a tree).
2669 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2670 otherwise, FUNC is 0. */
2672 ix86_function_value (tree valtype)
2676 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2677 REGPARM_MAX, SSE_REGPARM_MAX,
2678 x86_64_int_return_registers, 0);
2679 /* For zero sized structures, construct_container return NULL, but we need
2680 to keep rest of compiler happy by returning meaningful value. */
2682 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2686 return gen_rtx_REG (TYPE_MODE (valtype),
2687 ix86_value_regno (TYPE_MODE (valtype)));
2690 /* Return false iff type is returned in memory. */
2692 ix86_return_in_memory (tree type)
2694 int needed_intregs, needed_sseregs, size;
2695 enum machine_mode mode = TYPE_MODE (type);
2698 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2700 if (mode == BLKmode)
2703 size = int_size_in_bytes (type);
2705 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2708 if (VECTOR_MODE_P (mode) || mode == TImode)
2710 /* User-created vectors small enough to fit in EAX. */
2714 /* MMX/3dNow values are returned on the stack, since we've
2715 got to EMMS/FEMMS before returning. */
2719 /* SSE values are returned in XMM0. */
2720 /* ??? Except when it doesn't exist? We have a choice of
2721 either (1) being abi incompatible with a -march switch,
2722 or (2) generating an error here. Given no good solution,
2723 I think the safest thing is one warning. The user won't
2724 be able to use -Werror, but... */
2735 warning ("SSE vector return without SSE enabled "
2749 /* Define how to find the value returned by a library function
2750 assuming the value has mode MODE. */
2752 ix86_libcall_value (enum machine_mode mode)
2762 return gen_rtx_REG (mode, FIRST_SSE_REG);
2765 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2767 return gen_rtx_REG (mode, 0);
2771 return gen_rtx_REG (mode, ix86_value_regno (mode));
2774 /* Given a mode, return the register to use for a return value. */
2777 ix86_value_regno (enum machine_mode mode)
2779 /* Floating point return values in %st(0). */
2780 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2781 return FIRST_FLOAT_REG;
2782 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2783 we prevent this case when sse is not available. */
2784 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2785 return FIRST_SSE_REG;
2786 /* Everything else in %eax. */
2790 /* Create the va_list data type. */
2793 ix86_build_va_list (void)
2795 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2797 /* For i386 we use plain pointer to argument area. */
2799 return build_pointer_type (char_type_node);
2801 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2802 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2804 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2805 unsigned_type_node);
2806 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2807 unsigned_type_node);
2808 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2810 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2813 DECL_FIELD_CONTEXT (f_gpr) = record;
2814 DECL_FIELD_CONTEXT (f_fpr) = record;
2815 DECL_FIELD_CONTEXT (f_ovf) = record;
2816 DECL_FIELD_CONTEXT (f_sav) = record;
2818 TREE_CHAIN (record) = type_decl;
2819 TYPE_NAME (record) = type_decl;
2820 TYPE_FIELDS (record) = f_gpr;
2821 TREE_CHAIN (f_gpr) = f_fpr;
2822 TREE_CHAIN (f_fpr) = f_ovf;
2823 TREE_CHAIN (f_ovf) = f_sav;
2825 layout_type (record);
2827 /* The correct type is an array type of one element. */
2828 return build_array_type (record, build_index_type (size_zero_node));
2831 /* Perform any needed actions needed for a function that is receiving a
2832 variable number of arguments.
2836 MODE and TYPE are the mode and type of the current parameter.
2838 PRETEND_SIZE is a variable that should be set to the amount of stack
2839 that must be pushed by the prolog to pretend that our caller pushed
2842 Normally, this macro will push all remaining incoming registers on the
2843 stack and set PRETEND_SIZE to the length of the registers pushed. */
2846 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2847 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2850 CUMULATIVE_ARGS next_cum;
2851 rtx save_area = NULL_RTX, mem;
2864 /* Indicate to allocate space on the stack for varargs save area. */
2865 ix86_save_varrargs_registers = 1;
2867 cfun->stack_alignment_needed = 128;
2869 fntype = TREE_TYPE (current_function_decl);
2870 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2871 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2872 != void_type_node));
2874 /* For varargs, we do not want to skip the dummy va_dcl argument.
2875 For stdargs, we do want to skip the last named argument. */
2878 function_arg_advance (&next_cum, mode, type, 1);
2881 save_area = frame_pointer_rtx;
2883 set = get_varargs_alias_set ();
2885 for (i = next_cum.regno; i < ix86_regparm; i++)
2887 mem = gen_rtx_MEM (Pmode,
2888 plus_constant (save_area, i * UNITS_PER_WORD));
2889 set_mem_alias_set (mem, set);
2890 emit_move_insn (mem, gen_rtx_REG (Pmode,
2891 x86_64_int_parameter_registers[i]));
2894 if (next_cum.sse_nregs)
2896 /* Now emit code to save SSE registers. The AX parameter contains number
2897 of SSE parameter registers used to call this function. We use
2898 sse_prologue_save insn template that produces computed jump across
2899 SSE saves. We need some preparation work to get this working. */
2901 label = gen_label_rtx ();
2902 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2904 /* Compute address to jump to :
2905 label - 5*eax + nnamed_sse_arguments*5 */
2906 tmp_reg = gen_reg_rtx (Pmode);
2907 nsse_reg = gen_reg_rtx (Pmode);
2908 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2909 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2910 gen_rtx_MULT (Pmode, nsse_reg,
2912 if (next_cum.sse_regno)
2915 gen_rtx_CONST (DImode,
2916 gen_rtx_PLUS (DImode,
2918 GEN_INT (next_cum.sse_regno * 4))));
2920 emit_move_insn (nsse_reg, label_ref);
2921 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2923 /* Compute address of memory block we save into. We always use pointer
2924 pointing 127 bytes after first byte to store - this is needed to keep
2925 instruction size limited by 4 bytes. */
2926 tmp_reg = gen_reg_rtx (Pmode);
2927 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2928 plus_constant (save_area,
2929 8 * REGPARM_MAX + 127)));
2930 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2931 set_mem_alias_set (mem, set);
2932 set_mem_align (mem, BITS_PER_WORD);
2934 /* And finally do the dirty job! */
2935 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2936 GEN_INT (next_cum.sse_regno), label));
2941 /* Implement va_start. */
2944 ix86_va_start (tree valist, rtx nextarg)
2946 HOST_WIDE_INT words, n_gpr, n_fpr;
2947 tree f_gpr, f_fpr, f_ovf, f_sav;
2948 tree gpr, fpr, ovf, sav, t;
2950 /* Only 64bit target needs something special. */
2953 std_expand_builtin_va_start (valist, nextarg);
2957 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2958 f_fpr = TREE_CHAIN (f_gpr);
2959 f_ovf = TREE_CHAIN (f_fpr);
2960 f_sav = TREE_CHAIN (f_ovf);
2962 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2963 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2964 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2965 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2966 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2968 /* Count number of gp and fp argument registers used. */
2969 words = current_function_args_info.words;
2970 n_gpr = current_function_args_info.regno;
2971 n_fpr = current_function_args_info.sse_regno;
2973 if (TARGET_DEBUG_ARG)
2974 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2975 (int) words, (int) n_gpr, (int) n_fpr);
2977 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2978 build_int_2 (n_gpr * 8, 0));
2979 TREE_SIDE_EFFECTS (t) = 1;
2980 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2982 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2983 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2984 TREE_SIDE_EFFECTS (t) = 1;
2985 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2987 /* Find the overflow area. */
2988 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2990 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2991 build_int_2 (words * UNITS_PER_WORD, 0));
2992 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2993 TREE_SIDE_EFFECTS (t) = 1;
2994 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2996 /* Find the register save area.
2997 Prologue of the function save it right above stack frame. */
2998 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2999 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3000 TREE_SIDE_EFFECTS (t) = 1;
3001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3004 /* Implement va_arg. */
3006 ix86_va_arg (tree valist, tree type)
3008 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3009 tree f_gpr, f_fpr, f_ovf, f_sav;
3010 tree gpr, fpr, ovf, sav, t;
3012 rtx lab_false, lab_over = NULL_RTX;
3017 /* Only 64bit target needs something special. */
3020 return std_expand_builtin_va_arg (valist, type);
3023 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3024 f_fpr = TREE_CHAIN (f_gpr);
3025 f_ovf = TREE_CHAIN (f_fpr);
3026 f_sav = TREE_CHAIN (f_ovf);
3028 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3029 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3030 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3031 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3032 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3034 size = int_size_in_bytes (type);
3037 /* Passed by reference. */
3039 type = build_pointer_type (type);
3040 size = int_size_in_bytes (type);
3042 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3044 container = construct_container (TYPE_MODE (type), type, 0,
3045 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3047 * Pull the value out of the saved registers ...
3050 addr_rtx = gen_reg_rtx (Pmode);
3054 rtx int_addr_rtx, sse_addr_rtx;
3055 int needed_intregs, needed_sseregs;
3058 lab_over = gen_label_rtx ();
3059 lab_false = gen_label_rtx ();
3061 examine_argument (TYPE_MODE (type), type, 0,
3062 &needed_intregs, &needed_sseregs);
3065 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3066 || TYPE_ALIGN (type) > 128);
3068 /* In case we are passing structure, verify that it is consecutive block
3069 on the register save area. If not we need to do moves. */
3070 if (!need_temp && !REG_P (container))
3072 /* Verify that all registers are strictly consecutive */
3073 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3077 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3079 rtx slot = XVECEXP (container, 0, i);
3080 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3081 || INTVAL (XEXP (slot, 1)) != i * 16)
3089 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3091 rtx slot = XVECEXP (container, 0, i);
3092 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3093 || INTVAL (XEXP (slot, 1)) != i * 8)
3100 int_addr_rtx = addr_rtx;
3101 sse_addr_rtx = addr_rtx;
3105 int_addr_rtx = gen_reg_rtx (Pmode);
3106 sse_addr_rtx = gen_reg_rtx (Pmode);
3108 /* First ensure that we fit completely in registers. */
3111 emit_cmp_and_jump_insns (expand_expr
3112 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3113 GEN_INT ((REGPARM_MAX - needed_intregs +
3114 1) * 8), GE, const1_rtx, SImode,
3119 emit_cmp_and_jump_insns (expand_expr
3120 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3121 GEN_INT ((SSE_REGPARM_MAX -
3122 needed_sseregs + 1) * 16 +
3123 REGPARM_MAX * 8), GE, const1_rtx,
3124 SImode, 1, lab_false);
3127 /* Compute index to start of area used for integer regs. */
3130 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3131 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3132 if (r != int_addr_rtx)
3133 emit_move_insn (int_addr_rtx, r);
3137 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3138 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3139 if (r != sse_addr_rtx)
3140 emit_move_insn (sse_addr_rtx, r);
3148 /* Never use the memory itself, as it has the alias set. */
3149 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3150 mem = gen_rtx_MEM (BLKmode, x);
3151 force_operand (x, addr_rtx);
3152 set_mem_alias_set (mem, get_varargs_alias_set ());
3153 set_mem_align (mem, BITS_PER_UNIT);
3155 for (i = 0; i < XVECLEN (container, 0); i++)
3157 rtx slot = XVECEXP (container, 0, i);
3158 rtx reg = XEXP (slot, 0);
3159 enum machine_mode mode = GET_MODE (reg);
3165 if (SSE_REGNO_P (REGNO (reg)))
3167 src_addr = sse_addr_rtx;
3168 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3172 src_addr = int_addr_rtx;
3173 src_offset = REGNO (reg) * 8;
3175 src_mem = gen_rtx_MEM (mode, src_addr);
3176 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3177 src_mem = adjust_address (src_mem, mode, src_offset);
3178 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3179 emit_move_insn (dest_mem, src_mem);
3186 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3187 build_int_2 (needed_intregs * 8, 0));
3188 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3189 TREE_SIDE_EFFECTS (t) = 1;
3190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3195 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3196 build_int_2 (needed_sseregs * 16, 0));
3197 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3198 TREE_SIDE_EFFECTS (t) = 1;
3199 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3202 emit_jump_insn (gen_jump (lab_over));
3204 emit_label (lab_false);
3207 /* ... otherwise out of the overflow area. */
3209 /* Care for on-stack alignment if needed. */
3210 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3214 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3215 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3216 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3220 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3222 emit_move_insn (addr_rtx, r);
3225 build (PLUS_EXPR, TREE_TYPE (t), t,
3226 build_int_2 (rsize * UNITS_PER_WORD, 0));
3227 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3228 TREE_SIDE_EFFECTS (t) = 1;
3229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3232 emit_label (lab_over);
3236 r = gen_rtx_MEM (Pmode, addr_rtx);
3237 set_mem_alias_set (r, get_varargs_alias_set ());
3238 emit_move_insn (addr_rtx, r);
3244 /* Return nonzero if OP is either a i387 or SSE fp register. */
3246 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3248 return ANY_FP_REG_P (op);
3251 /* Return nonzero if OP is an i387 fp register. */
3253 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3255 return FP_REG_P (op);
3258 /* Return nonzero if OP is a non-fp register_operand. */
3260 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3262 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3265 /* Return nonzero if OP is a register operand other than an
3266 i387 fp register. */
3268 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3270 return register_operand (op, mode) && !FP_REG_P (op);
3273 /* Return nonzero if OP is general operand representable on x86_64. */
3276 x86_64_general_operand (rtx op, enum machine_mode mode)
3279 return general_operand (op, mode);
3280 if (nonimmediate_operand (op, mode))
3282 return x86_64_sign_extended_value (op);
3285 /* Return nonzero if OP is general operand representable on x86_64
3286 as either sign extended or zero extended constant. */
3289 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3292 return general_operand (op, mode);
3293 if (nonimmediate_operand (op, mode))
3295 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3298 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3301 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3304 return nonmemory_operand (op, mode);
3305 if (register_operand (op, mode))
3307 return x86_64_sign_extended_value (op);
3310 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3313 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3315 if (!TARGET_64BIT || !flag_pic)
3316 return nonmemory_operand (op, mode);
3317 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3319 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3324 /* Return nonzero if OPNUM's MEM should be matched
3325 in movabs* patterns. */
3328 ix86_check_movabs (rtx insn, int opnum)
3332 set = PATTERN (insn);
3333 if (GET_CODE (set) == PARALLEL)
3334 set = XVECEXP (set, 0, 0);
3335 if (GET_CODE (set) != SET)
3337 mem = XEXP (set, opnum);
3338 while (GET_CODE (mem) == SUBREG)
3339 mem = SUBREG_REG (mem);
3340 if (GET_CODE (mem) != MEM)
3342 return (volatile_ok || !MEM_VOLATILE_P (mem));
3345 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3348 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3351 return nonmemory_operand (op, mode);
3352 if (register_operand (op, mode))
3354 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3357 /* Return nonzero if OP is immediate operand representable on x86_64. */
3360 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3363 return immediate_operand (op, mode);
3364 return x86_64_sign_extended_value (op);
3367 /* Return nonzero if OP is immediate operand representable on x86_64. */
3370 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3372 return x86_64_zero_extended_value (op);
3375 /* Return nonzero if OP is (const_int 1), else return zero. */
3378 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3380 return op == const1_rtx;
3383 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3384 for shift & compare patterns, as shifting by 0 does not change flags),
3385 else return zero. */
3388 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3390 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3393 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3394 reference and a constant. */
3397 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3399 switch (GET_CODE (op))
3407 if (GET_CODE (op) == SYMBOL_REF
3408 || GET_CODE (op) == LABEL_REF
3409 || (GET_CODE (op) == UNSPEC
3410 && (XINT (op, 1) == UNSPEC_GOT
3411 || XINT (op, 1) == UNSPEC_GOTOFF
3412 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3414 if (GET_CODE (op) != PLUS
3415 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3419 if (GET_CODE (op) == SYMBOL_REF
3420 || GET_CODE (op) == LABEL_REF)
3422 /* Only @GOTOFF gets offsets. */
3423 if (GET_CODE (op) != UNSPEC
3424 || XINT (op, 1) != UNSPEC_GOTOFF)
3427 op = XVECEXP (op, 0, 0);
3428 if (GET_CODE (op) == SYMBOL_REF
3429 || GET_CODE (op) == LABEL_REF)
3438 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3441 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3443 if (GET_CODE (op) != CONST)
3448 if (GET_CODE (op) == UNSPEC
3449 && XINT (op, 1) == UNSPEC_GOTPCREL)
3451 if (GET_CODE (op) == PLUS
3452 && GET_CODE (XEXP (op, 0)) == UNSPEC
3453 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3458 if (GET_CODE (op) == UNSPEC)
3460 if (GET_CODE (op) != PLUS
3461 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3464 if (GET_CODE (op) == UNSPEC)
3470 /* Return true if OP is a symbolic operand that resolves locally. */
3473 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3475 if (GET_CODE (op) == CONST
3476 && GET_CODE (XEXP (op, 0)) == PLUS
3477 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3478 op = XEXP (XEXP (op, 0), 0);
3480 if (GET_CODE (op) == LABEL_REF)
3483 if (GET_CODE (op) != SYMBOL_REF)
3486 if (SYMBOL_REF_LOCAL_P (op))
3489 /* There is, however, a not insubstantial body of code in the rest of
3490 the compiler that assumes it can just stick the results of
3491 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3492 /* ??? This is a hack. Should update the body of the compiler to
3493 always create a DECL an invoke targetm.encode_section_info. */
3494 if (strncmp (XSTR (op, 0), internal_label_prefix,
3495 internal_label_prefix_len) == 0)
3501 /* Test for various thread-local symbols. */
3504 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3506 if (GET_CODE (op) != SYMBOL_REF)
3508 return SYMBOL_REF_TLS_MODEL (op);
3512 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3514 if (GET_CODE (op) != SYMBOL_REF)
3516 return SYMBOL_REF_TLS_MODEL (op) == kind;
3520 global_dynamic_symbolic_operand (register rtx op,
3521 enum machine_mode mode ATTRIBUTE_UNUSED)
3523 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3527 local_dynamic_symbolic_operand (register rtx op,
3528 enum machine_mode mode ATTRIBUTE_UNUSED)
3530 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3534 initial_exec_symbolic_operand (register rtx op,
3535 enum machine_mode mode ATTRIBUTE_UNUSED)
3537 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3541 local_exec_symbolic_operand (register rtx op,
3542 enum machine_mode mode ATTRIBUTE_UNUSED)
3544 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3547 /* Test for a valid operand for a call instruction. Don't allow the
3548 arg pointer register or virtual regs since they may decay into
3549 reg + const, which the patterns can't handle. */
3552 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3554 /* Disallow indirect through a virtual register. This leads to
3555 compiler aborts when trying to eliminate them. */
3556 if (GET_CODE (op) == REG
3557 && (op == arg_pointer_rtx
3558 || op == frame_pointer_rtx
3559 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3560 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3563 /* Disallow `call 1234'. Due to varying assembler lameness this
3564 gets either rejected or translated to `call .+1234'. */
3565 if (GET_CODE (op) == CONST_INT)
3568 /* Explicitly allow SYMBOL_REF even if pic. */
3569 if (GET_CODE (op) == SYMBOL_REF)
3572 /* Otherwise we can allow any general_operand in the address. */
3573 return general_operand (op, Pmode);
3576 /* Test for a valid operand for a call instruction. Don't allow the
3577 arg pointer register or virtual regs since they may decay into
3578 reg + const, which the patterns can't handle. */
3581 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3583 /* Disallow indirect through a virtual register. This leads to
3584 compiler aborts when trying to eliminate them. */
3585 if (GET_CODE (op) == REG
3586 && (op == arg_pointer_rtx
3587 || op == frame_pointer_rtx
3588 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3589 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3592 /* Explicitly allow SYMBOL_REF even if pic. */
3593 if (GET_CODE (op) == SYMBOL_REF)
3596 /* Otherwise we can only allow register operands. */
3597 return register_operand (op, Pmode);
3601 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3603 if (GET_CODE (op) == CONST
3604 && GET_CODE (XEXP (op, 0)) == PLUS
3605 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3606 op = XEXP (XEXP (op, 0), 0);
3607 return GET_CODE (op) == SYMBOL_REF;
3610 /* Match exactly zero and one. */
3613 const0_operand (register rtx op, enum machine_mode mode)
3615 return op == CONST0_RTX (mode);
3619 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3621 return op == const1_rtx;
3624 /* Match 2, 4, or 8. Used for leal multiplicands. */
3627 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3629 return (GET_CODE (op) == CONST_INT
3630 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3634 const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3636 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3640 const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3642 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3646 const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3648 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3652 const_0_to_255_operand (register rtx op,
3653 enum machine_mode mode ATTRIBUTE_UNUSED)
3655 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3659 /* True if this is a constant appropriate for an increment or decrement. */
3662 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3664 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3665 registers, since carry flag is not set. */
3666 if (TARGET_PENTIUM4 && !optimize_size)
3668 return op == const1_rtx || op == constm1_rtx;
3671 /* Return nonzero if OP is acceptable as operand of DImode shift
3675 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3678 return nonimmediate_operand (op, mode);
3680 return register_operand (op, mode);
3683 /* Return false if this is the stack pointer, or any other fake
3684 register eliminable to the stack pointer. Otherwise, this is
3687 This is used to prevent esp from being used as an index reg.
3688 Which would only happen in pathological cases. */
3691 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3694 if (GET_CODE (t) == SUBREG)
3696 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3699 return register_operand (op, mode);
3703 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3705 return MMX_REG_P (op);
3708 /* Return false if this is any eliminable register. Otherwise
3712 general_no_elim_operand (register rtx op, enum machine_mode mode)
3715 if (GET_CODE (t) == SUBREG)
3717 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3718 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3719 || t == virtual_stack_dynamic_rtx)
3722 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3723 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3726 return general_operand (op, mode);
3729 /* Return false if this is any eliminable register. Otherwise
3730 register_operand or const_int. */
3733 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3736 if (GET_CODE (t) == SUBREG)
3738 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3739 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3740 || t == virtual_stack_dynamic_rtx)
3743 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3746 /* Return false if this is any eliminable register or stack register,
3747 otherwise work like register_operand. */
3750 index_register_operand (register rtx op, enum machine_mode mode)
3753 if (GET_CODE (t) == SUBREG)
3757 if (t == arg_pointer_rtx
3758 || t == frame_pointer_rtx
3759 || t == virtual_incoming_args_rtx
3760 || t == virtual_stack_vars_rtx
3761 || t == virtual_stack_dynamic_rtx
3762 || REGNO (t) == STACK_POINTER_REGNUM)
3765 return general_operand (op, mode);
3768 /* Return true if op is a Q_REGS class register. */
3771 q_regs_operand (register rtx op, enum machine_mode mode)
3773 if (mode != VOIDmode && GET_MODE (op) != mode)
3775 if (GET_CODE (op) == SUBREG)
3776 op = SUBREG_REG (op);
3777 return ANY_QI_REG_P (op);
3780 /* Return true if op is an flags register. */
3783 flags_reg_operand (register rtx op, enum machine_mode mode)
3785 if (mode != VOIDmode && GET_MODE (op) != mode)
3787 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3790 /* Return true if op is a NON_Q_REGS class register. */
3793 non_q_regs_operand (register rtx op, enum machine_mode mode)
3795 if (mode != VOIDmode && GET_MODE (op) != mode)
3797 if (GET_CODE (op) == SUBREG)
3798 op = SUBREG_REG (op);
3799 return NON_QI_REG_P (op);
3803 zero_extended_scalar_load_operand (rtx op,
3804 enum machine_mode mode ATTRIBUTE_UNUSED)
3807 if (GET_CODE (op) != MEM)
3809 op = maybe_get_pool_constant (op);
3812 if (GET_CODE (op) != CONST_VECTOR)
3815 (GET_MODE_SIZE (GET_MODE (op)) /
3816 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3817 for (n_elts--; n_elts > 0; n_elts--)
3819 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3820 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3826 /* Return 1 when OP is operand acceptable for standard SSE move. */
3828 vector_move_operand (rtx op, enum machine_mode mode)
3830 if (nonimmediate_operand (op, mode))
3832 if (GET_MODE (op) != mode && mode != VOIDmode)
3834 return (op == CONST0_RTX (GET_MODE (op)));
3837 /* Return true if op if a valid address, and does not contain
3838 a segment override. */
3841 no_seg_address_operand (register rtx op, enum machine_mode mode)
3843 struct ix86_address parts;
3845 if (! address_operand (op, mode))
3848 if (! ix86_decompose_address (op, &parts))
3851 return parts.seg == SEG_DEFAULT;
3854 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3857 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3859 enum rtx_code code = GET_CODE (op);
3862 /* Operations supported directly. */
3872 /* These are equivalent to ones above in non-IEEE comparisons. */
3879 return !TARGET_IEEE_FP;
3884 /* Return 1 if OP is a valid comparison operator in valid mode. */
3886 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3888 enum machine_mode inmode;
3889 enum rtx_code code = GET_CODE (op);
3890 if (mode != VOIDmode && GET_MODE (op) != mode)
3892 if (GET_RTX_CLASS (code) != '<')
3894 inmode = GET_MODE (XEXP (op, 0));
3896 if (inmode == CCFPmode || inmode == CCFPUmode)
3898 enum rtx_code second_code, bypass_code;
3899 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3900 return (bypass_code == NIL && second_code == NIL);
3907 if (inmode == CCmode || inmode == CCGCmode
3908 || inmode == CCGOCmode || inmode == CCNOmode)
3911 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3912 if (inmode == CCmode)
3916 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3924 /* Return 1 if OP is a valid comparison operator testing carry flag
3927 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3929 enum machine_mode inmode;
3930 enum rtx_code code = GET_CODE (op);
3932 if (mode != VOIDmode && GET_MODE (op) != mode)
3934 if (GET_RTX_CLASS (code) != '<')
3936 inmode = GET_MODE (XEXP (op, 0));
3937 if (GET_CODE (XEXP (op, 0)) != REG
3938 || REGNO (XEXP (op, 0)) != 17
3939 || XEXP (op, 1) != const0_rtx)
3942 if (inmode == CCFPmode || inmode == CCFPUmode)
3944 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 if (bypass_code != NIL || second_code != NIL)
3949 code = ix86_fp_compare_code_to_integer (code);
3951 else if (inmode != CCmode)
3956 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3959 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3961 enum machine_mode inmode;
3962 enum rtx_code code = GET_CODE (op);
3964 if (mode != VOIDmode && GET_MODE (op) != mode)
3966 if (GET_RTX_CLASS (code) != '<')
3968 inmode = GET_MODE (XEXP (op, 0));
3969 if (inmode == CCFPmode || inmode == CCFPUmode)
3971 enum rtx_code second_code, bypass_code;
3973 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3974 if (bypass_code != NIL || second_code != NIL)
3976 code = ix86_fp_compare_code_to_integer (code);
3978 /* i387 supports just limited amount of conditional codes. */
3981 case LTU: case GTU: case LEU: case GEU:
3982 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3985 case ORDERED: case UNORDERED:
3993 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3996 promotable_binary_operator (register rtx op,
3997 enum machine_mode mode ATTRIBUTE_UNUSED)
3999 switch (GET_CODE (op))
4002 /* Modern CPUs have same latency for HImode and SImode multiply,
4003 but 386 and 486 do HImode multiply faster. */
4004 return ix86_tune > PROCESSOR_I486;
4016 /* Nearly general operand, but accept any const_double, since we wish
4017 to be able to drop them into memory rather than have them get pulled
4021 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
4023 if (mode != VOIDmode && mode != GET_MODE (op))
4025 if (GET_CODE (op) == CONST_DOUBLE)
4027 return general_operand (op, mode);
4030 /* Match an SI or HImode register for a zero_extract. */
4033 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4036 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4037 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4040 if (!register_operand (op, VOIDmode))
4043 /* Be careful to accept only registers having upper parts. */
4044 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4045 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4048 /* Return 1 if this is a valid binary floating-point operation.
4049 OP is the expression matched, and MODE is its mode. */
4052 binary_fp_operator (register rtx op, enum machine_mode mode)
4054 if (mode != VOIDmode && mode != GET_MODE (op))
4057 switch (GET_CODE (op))
4063 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4071 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4073 return GET_CODE (op) == MULT;
4077 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4079 return GET_CODE (op) == DIV;
4083 arith_or_logical_operator (rtx op, enum machine_mode mode)
4085 return ((mode == VOIDmode || GET_MODE (op) == mode)
4086 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4087 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4090 /* Returns 1 if OP is memory operand with a displacement. */
4093 memory_displacement_operand (register rtx op, enum machine_mode mode)
4095 struct ix86_address parts;
4097 if (! memory_operand (op, mode))
4100 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4103 return parts.disp != NULL_RTX;
4106 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4107 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4109 ??? It seems likely that this will only work because cmpsi is an
4110 expander, and no actual insns use this. */
4113 cmpsi_operand (rtx op, enum machine_mode mode)
4115 if (nonimmediate_operand (op, mode))
4118 if (GET_CODE (op) == AND
4119 && GET_MODE (op) == SImode
4120 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4121 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4122 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4123 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4124 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4125 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4131 /* Returns 1 if OP is memory operand that can not be represented by the
4135 long_memory_operand (register rtx op, enum machine_mode mode)
4137 if (! memory_operand (op, mode))
4140 return memory_address_length (op) != 0;
4143 /* Return nonzero if the rtx is known aligned. */
4146 aligned_operand (rtx op, enum machine_mode mode)
4148 struct ix86_address parts;
4150 if (!general_operand (op, mode))
4153 /* Registers and immediate operands are always "aligned". */
4154 if (GET_CODE (op) != MEM)
4157 /* Don't even try to do any aligned optimizations with volatiles. */
4158 if (MEM_VOLATILE_P (op))
4163 /* Pushes and pops are only valid on the stack pointer. */
4164 if (GET_CODE (op) == PRE_DEC
4165 || GET_CODE (op) == POST_INC)
4168 /* Decode the address. */
4169 if (! ix86_decompose_address (op, &parts))
4172 if (parts.base && GET_CODE (parts.base) == SUBREG)
4173 parts.base = SUBREG_REG (parts.base);
4174 if (parts.index && GET_CODE (parts.index) == SUBREG)
4175 parts.index = SUBREG_REG (parts.index);
4177 /* Look for some component that isn't known to be aligned. */
4181 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4186 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4191 if (GET_CODE (parts.disp) != CONST_INT
4192 || (INTVAL (parts.disp) & 3) != 0)
4196 /* Didn't find one -- this must be an aligned address. */
4200 /* Initialize the table of extra 80387 mathematical constants. */
4203 init_ext_80387_constants (void)
4205 static const char * cst[5] =
4207 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4208 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4209 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4210 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4211 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4215 for (i = 0; i < 5; i++)
4217 real_from_string (&ext_80387_constants_table[i], cst[i]);
4218 /* Ensure each constant is rounded to XFmode precision. */
4219 real_convert (&ext_80387_constants_table[i],
4220 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4221 &ext_80387_constants_table[i]);
4224 ext_80387_constants_init = 1;
4227 /* Return true if the constant is something that can be loaded with
4228 a special instruction. */
4231 standard_80387_constant_p (rtx x)
4233 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4236 if (x == CONST0_RTX (GET_MODE (x)))
4238 if (x == CONST1_RTX (GET_MODE (x)))
4241 /* For XFmode constants, try to find a special 80387 instruction on
4242 those CPUs that benefit from them. */
4243 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4244 && x86_ext_80387_constants & TUNEMASK)
4249 if (! ext_80387_constants_init)
4250 init_ext_80387_constants ();
4252 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4253 for (i = 0; i < 5; i++)
4254 if (real_identical (&r, &ext_80387_constants_table[i]))
4261 /* Return the opcode of the special instruction to be used to load
4265 standard_80387_constant_opcode (rtx x)
4267 switch (standard_80387_constant_p (x))
4287 /* Return the CONST_DOUBLE representing the 80387 constant that is
4288 loaded by the specified special instruction. The argument IDX
4289 matches the return value from standard_80387_constant_p. */
4292 standard_80387_constant_rtx (int idx)
4296 if (! ext_80387_constants_init)
4297 init_ext_80387_constants ();
4313 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4314 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4317 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4320 standard_sse_constant_p (rtx x)
4322 if (x == const0_rtx)
4324 return (x == CONST0_RTX (GET_MODE (x)));
4327 /* Returns 1 if OP contains a symbol reference */
4330 symbolic_reference_mentioned_p (rtx op)
4332 register const char *fmt;
4335 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4338 fmt = GET_RTX_FORMAT (GET_CODE (op));
4339 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4345 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4346 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4350 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4357 /* Return 1 if it is appropriate to emit `ret' instructions in the
4358 body of a function. Do this only if the epilogue is simple, needing a
4359 couple of insns. Prior to reloading, we can't tell how many registers
4360 must be saved, so return 0 then. Return 0 if there is no frame
4361 marker to de-allocate.
4363 If NON_SAVING_SETJMP is defined and true, then it is not possible
4364 for the epilogue to be simple, so return 0. This is a special case
4365 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4366 until final, but jump_optimize may need to know sooner if a
4370 ix86_can_use_return_insn_p (void)
4372 struct ix86_frame frame;
4374 #ifdef NON_SAVING_SETJMP
4375 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4379 if (! reload_completed || frame_pointer_needed)
4382 /* Don't allow more than 32 pop, since that's all we can do
4383 with one instruction. */
4384 if (current_function_pops_args
4385 && current_function_args_size >= 32768)
4388 ix86_compute_frame_layout (&frame);
4389 return frame.to_allocate == 0 && frame.nregs == 0;
4392 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4394 x86_64_sign_extended_value (rtx value)
4396 switch (GET_CODE (value))
4398 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4399 to be at least 32 and this all acceptable constants are
4400 represented as CONST_INT. */
4402 if (HOST_BITS_PER_WIDE_INT == 32)
4406 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4407 return trunc_int_for_mode (val, SImode) == val;
4411 /* For certain code models, the symbolic references are known to fit.
4412 in CM_SMALL_PIC model we know it fits if it is local to the shared
4413 library. Don't count TLS SYMBOL_REFs here, since they should fit
4414 only if inside of UNSPEC handled below. */
4416 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4418 /* For certain code models, the code is near as well. */
4420 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4421 || ix86_cmodel == CM_KERNEL);
4423 /* We also may accept the offsetted memory references in certain special
4426 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4427 switch (XINT (XEXP (value, 0), 1))
4429 case UNSPEC_GOTPCREL:
4431 case UNSPEC_GOTNTPOFF:
4437 if (GET_CODE (XEXP (value, 0)) == PLUS)
4439 rtx op1 = XEXP (XEXP (value, 0), 0);
4440 rtx op2 = XEXP (XEXP (value, 0), 1);
4441 HOST_WIDE_INT offset;
4443 if (ix86_cmodel == CM_LARGE)
4445 if (GET_CODE (op2) != CONST_INT)
4447 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4448 switch (GET_CODE (op1))
4451 /* For CM_SMALL assume that latest object is 16MB before
4452 end of 31bits boundary. We may also accept pretty
4453 large negative constants knowing that all objects are
4454 in the positive half of address space. */
4455 if (ix86_cmodel == CM_SMALL
4456 && offset < 16*1024*1024
4457 && trunc_int_for_mode (offset, SImode) == offset)
4459 /* For CM_KERNEL we know that all object resist in the
4460 negative half of 32bits address space. We may not
4461 accept negative offsets, since they may be just off
4462 and we may accept pretty large positive ones. */
4463 if (ix86_cmodel == CM_KERNEL
4465 && trunc_int_for_mode (offset, SImode) == offset)
4469 /* These conditions are similar to SYMBOL_REF ones, just the
4470 constraints for code models differ. */
4471 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4472 && offset < 16*1024*1024
4473 && trunc_int_for_mode (offset, SImode) == offset)
4475 if (ix86_cmodel == CM_KERNEL
4477 && trunc_int_for_mode (offset, SImode) == offset)
4481 switch (XINT (op1, 1))
4486 && trunc_int_for_mode (offset, SImode) == offset)
4500 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4502 x86_64_zero_extended_value (rtx value)
4504 switch (GET_CODE (value))
4507 if (HOST_BITS_PER_WIDE_INT == 32)
4508 return (GET_MODE (value) == VOIDmode
4509 && !CONST_DOUBLE_HIGH (value));
4513 if (HOST_BITS_PER_WIDE_INT == 32)
4514 return INTVAL (value) >= 0;
4516 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4519 /* For certain code models, the symbolic references are known to fit. */
4521 return ix86_cmodel == CM_SMALL;
4523 /* For certain code models, the code is near as well. */
4525 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4527 /* We also may accept the offsetted memory references in certain special
4530 if (GET_CODE (XEXP (value, 0)) == PLUS)
4532 rtx op1 = XEXP (XEXP (value, 0), 0);
4533 rtx op2 = XEXP (XEXP (value, 0), 1);
4535 if (ix86_cmodel == CM_LARGE)
4537 switch (GET_CODE (op1))
4541 /* For small code model we may accept pretty large positive
4542 offsets, since one bit is available for free. Negative
4543 offsets are limited by the size of NULL pointer area
4544 specified by the ABI. */
4545 if (ix86_cmodel == CM_SMALL
4546 && GET_CODE (op2) == CONST_INT
4547 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4548 && (trunc_int_for_mode (INTVAL (op2), SImode)
4551 /* ??? For the kernel, we may accept adjustment of
4552 -0x10000000, since we know that it will just convert
4553 negative address space to positive, but perhaps this
4554 is not worthwhile. */
4557 /* These conditions are similar to SYMBOL_REF ones, just the
4558 constraints for code models differ. */
4559 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4560 && GET_CODE (op2) == CONST_INT
4561 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4562 && (trunc_int_for_mode (INTVAL (op2), SImode)
4576 /* Value should be nonzero if functions must have frame pointers.
4577 Zero means the frame pointer need not be set up (and parms may
4578 be accessed via the stack pointer) in functions that seem suitable. */
4581 ix86_frame_pointer_required (void)
4583 /* If we accessed previous frames, then the generated code expects
4584 to be able to access the saved ebp value in our frame. */
4585 if (cfun->machine->accesses_prev_frame)
4588 /* Several x86 os'es need a frame pointer for other reasons,
4589 usually pertaining to setjmp. */
4590 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4593 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4594 the frame pointer by default. Turn it back on now if we've not
4595 got a leaf function. */
4596 if (TARGET_OMIT_LEAF_FRAME_POINTER
4597 && (!current_function_is_leaf))
4600 if (current_function_profile)
4606 /* Record that the current function accesses previous call frames. */
4609 ix86_setup_frame_addresses (void)
4611 cfun->machine->accesses_prev_frame = 1;
4614 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4615 # define USE_HIDDEN_LINKONCE 1
4617 # define USE_HIDDEN_LINKONCE 0
4620 static int pic_labels_used;
4622 /* Fills in the label name that should be used for a pc thunk for
4623 the given register. */
4626 get_pc_thunk_name (char name[32], unsigned int regno)
4628 if (USE_HIDDEN_LINKONCE)
4629 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4631 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4635 /* This function generates code for -fpic that loads %ebx with
4636 the return address of the caller and then returns. */
4639 ix86_file_end (void)
4644 for (regno = 0; regno < 8; ++regno)
4648 if (! ((pic_labels_used >> regno) & 1))
4651 get_pc_thunk_name (name, regno);
4653 if (USE_HIDDEN_LINKONCE)
4657 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4659 TREE_PUBLIC (decl) = 1;
4660 TREE_STATIC (decl) = 1;
4661 DECL_ONE_ONLY (decl) = 1;
4663 (*targetm.asm_out.unique_section) (decl, 0);
4664 named_section (decl, NULL, 0);
4666 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4667 fputs ("\t.hidden\t", asm_out_file);
4668 assemble_name (asm_out_file, name);
4669 fputc ('\n', asm_out_file);
4670 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4675 ASM_OUTPUT_LABEL (asm_out_file, name);
4678 xops[0] = gen_rtx_REG (SImode, regno);
4679 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4680 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4681 output_asm_insn ("ret", xops);
4684 if (NEED_INDICATE_EXEC_STACK)
4685 file_end_indicate_exec_stack ();
4688 /* Emit code for the SET_GOT patterns. */
4691 output_set_got (rtx dest)
4696 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4698 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4700 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4703 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4705 output_asm_insn ("call\t%a2", xops);
4708 /* Output the "canonical" label name ("Lxx$pb") here too. This
4709 is what will be referred to by the Mach-O PIC subsystem. */
4710 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4712 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4713 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4716 output_asm_insn ("pop{l}\t%0", xops);
4721 get_pc_thunk_name (name, REGNO (dest));
4722 pic_labels_used |= 1 << REGNO (dest);
4724 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4725 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4726 output_asm_insn ("call\t%X2", xops);
4729 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4730 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4731 else if (!TARGET_MACHO)
4732 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4737 /* Generate an "push" pattern for input ARG. */
4742 return gen_rtx_SET (VOIDmode,
4744 gen_rtx_PRE_DEC (Pmode,
4745 stack_pointer_rtx)),
4749 /* Return >= 0 if there is an unused call-clobbered register available
4750 for the entire function. */
4753 ix86_select_alt_pic_regnum (void)
4755 if (current_function_is_leaf && !current_function_profile)
4758 for (i = 2; i >= 0; --i)
4759 if (!regs_ever_live[i])
4763 return INVALID_REGNUM;
4766 /* Return 1 if we need to save REGNO. */
4768 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4770 if (pic_offset_table_rtx
4771 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4772 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4773 || current_function_profile
4774 || current_function_calls_eh_return
4775 || current_function_uses_const_pool))
4777 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4782 if (current_function_calls_eh_return && maybe_eh_return)
4787 unsigned test = EH_RETURN_DATA_REGNO (i);
4788 if (test == INVALID_REGNUM)
4795 return (regs_ever_live[regno]
4796 && !call_used_regs[regno]
4797 && !fixed_regs[regno]
4798 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4801 /* Return number of registers to be saved on the stack. */
4804 ix86_nsaved_regs (void)
4809 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4810 if (ix86_save_reg (regno, true))
4815 /* Return the offset between two registers, one to be eliminated, and the other
4816 its replacement, at the start of a routine. */
4819 ix86_initial_elimination_offset (int from, int to)
4821 struct ix86_frame frame;
4822 ix86_compute_frame_layout (&frame);
4824 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4825 return frame.hard_frame_pointer_offset;
4826 else if (from == FRAME_POINTER_REGNUM
4827 && to == HARD_FRAME_POINTER_REGNUM)
4828 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4831 if (to != STACK_POINTER_REGNUM)
4833 else if (from == ARG_POINTER_REGNUM)
4834 return frame.stack_pointer_offset;
4835 else if (from != FRAME_POINTER_REGNUM)
4838 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4842 /* Fill structure ix86_frame about frame of currently computed function. */
4845 ix86_compute_frame_layout (struct ix86_frame *frame)
4847 HOST_WIDE_INT total_size;
4848 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4850 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4851 HOST_WIDE_INT size = get_frame_size ();
4853 frame->nregs = ix86_nsaved_regs ();
4856 /* During reload iteration the amount of registers saved can change.
4857 Recompute the value as needed. Do not recompute when amount of registers
4858 didn't change as reload does mutiple calls to the function and does not
4859 expect the decision to change within single iteration. */
4861 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4863 int count = frame->nregs;
4865 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4866 /* The fast prologue uses move instead of push to save registers. This
4867 is significantly longer, but also executes faster as modern hardware
4868 can execute the moves in parallel, but can't do that for push/pop.
4870 Be careful about choosing what prologue to emit: When function takes
4871 many instructions to execute we may use slow version as well as in
4872 case function is known to be outside hot spot (this is known with
4873 feedback only). Weight the size of function by number of registers
4874 to save as it is cheap to use one or two push instructions but very
4875 slow to use many of them. */
4877 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4878 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4879 || (flag_branch_probabilities
4880 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4881 cfun->machine->use_fast_prologue_epilogue = false;
4883 cfun->machine->use_fast_prologue_epilogue
4884 = !expensive_function_p (count);
4886 if (TARGET_PROLOGUE_USING_MOVE
4887 && cfun->machine->use_fast_prologue_epilogue)
4888 frame->save_regs_using_mov = true;
4890 frame->save_regs_using_mov = false;
4893 /* Skip return address and saved base pointer. */
4894 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4896 frame->hard_frame_pointer_offset = offset;
4898 /* Do some sanity checking of stack_alignment_needed and
4899 preferred_alignment, since i386 port is the only using those features
4900 that may break easily. */
4902 if (size && !stack_alignment_needed)
4904 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4906 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4908 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4911 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4912 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4914 /* Register save area */
4915 offset += frame->nregs * UNITS_PER_WORD;
4918 if (ix86_save_varrargs_registers)
4920 offset += X86_64_VARARGS_SIZE;
4921 frame->va_arg_size = X86_64_VARARGS_SIZE;
4924 frame->va_arg_size = 0;
4926 /* Align start of frame for local function. */
4927 frame->padding1 = ((offset + stack_alignment_needed - 1)
4928 & -stack_alignment_needed) - offset;
4930 offset += frame->padding1;
4932 /* Frame pointer points here. */
4933 frame->frame_pointer_offset = offset;
4937 /* Add outgoing arguments area. Can be skipped if we eliminated
4938 all the function calls as dead code. */
4939 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4941 offset += current_function_outgoing_args_size;
4942 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4945 frame->outgoing_arguments_size = 0;
4947 /* Align stack boundary. Only needed if we're calling another function
4949 if (!current_function_is_leaf || current_function_calls_alloca)
4950 frame->padding2 = ((offset + preferred_alignment - 1)
4951 & -preferred_alignment) - offset;
4953 frame->padding2 = 0;
4955 offset += frame->padding2;
4957 /* We've reached end of stack frame. */
4958 frame->stack_pointer_offset = offset;
4960 /* Size prologue needs to allocate. */
4961 frame->to_allocate =
4962 (size + frame->padding1 + frame->padding2
4963 + frame->outgoing_arguments_size + frame->va_arg_size);
4965 if (!frame->to_allocate && frame->nregs <= 1)
4966 frame->save_regs_using_mov = false;
4968 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4969 && current_function_is_leaf)
4971 frame->red_zone_size = frame->to_allocate;
4972 if (frame->save_regs_using_mov)
4973 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4974 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4975 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4978 frame->red_zone_size = 0;
4979 frame->to_allocate -= frame->red_zone_size;
4980 frame->stack_pointer_offset -= frame->red_zone_size;
4982 fprintf (stderr, "nregs: %i\n", frame->nregs);
4983 fprintf (stderr, "size: %i\n", size);
4984 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4985 fprintf (stderr, "padding1: %i\n", frame->padding1);
4986 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4987 fprintf (stderr, "padding2: %i\n", frame->padding2);
4988 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4989 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4990 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4991 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4992 frame->hard_frame_pointer_offset);
4993 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4997 /* Emit code to save registers in the prologue. */
5000 ix86_emit_save_regs (void)
5005 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5006 if (ix86_save_reg (regno, true))
5008 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5009 RTX_FRAME_RELATED_P (insn) = 1;
5013 /* Emit code to save registers using MOV insns. First register
5014 is restored from POINTER + OFFSET. */
5016 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5021 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5022 if (ix86_save_reg (regno, true))
5024 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5026 gen_rtx_REG (Pmode, regno));
5027 RTX_FRAME_RELATED_P (insn) = 1;
5028 offset += UNITS_PER_WORD;
5032 /* Expand the prologue into a bunch of separate insns. */
5035 ix86_expand_prologue (void)
5039 struct ix86_frame frame;
5040 HOST_WIDE_INT allocate;
5042 ix86_compute_frame_layout (&frame);
5044 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5045 slower on all targets. Also sdb doesn't like it. */
5047 if (frame_pointer_needed)
5049 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5050 RTX_FRAME_RELATED_P (insn) = 1;
5052 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5053 RTX_FRAME_RELATED_P (insn) = 1;
5056 allocate = frame.to_allocate;
5058 if (!frame.save_regs_using_mov)
5059 ix86_emit_save_regs ();
5061 allocate += frame.nregs * UNITS_PER_WORD;
5063 /* When using red zone we may start register saving before allocating
5064 the stack frame saving one cycle of the prologue. */
5065 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5066 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5067 : stack_pointer_rtx,
5068 -frame.nregs * UNITS_PER_WORD);
5072 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5074 insn = emit_insn (gen_pro_epilogue_adjust_stack
5075 (stack_pointer_rtx, stack_pointer_rtx,
5076 GEN_INT (-allocate)));
5077 RTX_FRAME_RELATED_P (insn) = 1;
5081 /* ??? Is this only valid for Win32? */
5088 arg0 = gen_rtx_REG (SImode, 0);
5089 emit_move_insn (arg0, GEN_INT (allocate));
5091 sym = gen_rtx_MEM (FUNCTION_MODE,
5092 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5093 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5095 CALL_INSN_FUNCTION_USAGE (insn)
5096 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5097 CALL_INSN_FUNCTION_USAGE (insn));
5099 /* Don't allow scheduling pass to move insns across __alloca
5101 emit_insn (gen_blockage (const0_rtx));
5103 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5105 if (!frame_pointer_needed || !frame.to_allocate)
5106 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5108 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5109 -frame.nregs * UNITS_PER_WORD);
5112 pic_reg_used = false;
5113 if (pic_offset_table_rtx
5114 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5115 || current_function_profile))
5117 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5119 if (alt_pic_reg_used != INVALID_REGNUM)
5120 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5122 pic_reg_used = true;
5127 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5129 /* Even with accurate pre-reload life analysis, we can wind up
5130 deleting all references to the pic register after reload.
5131 Consider if cross-jumping unifies two sides of a branch
5132 controlled by a comparison vs the only read from a global.
5133 In which case, allow the set_got to be deleted, though we're
5134 too late to do anything about the ebx save in the prologue. */
5135 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5138 /* Prevent function calls from be scheduled before the call to mcount.
5139 In the pic_reg_used case, make sure that the got load isn't deleted. */
5140 if (current_function_profile)
5141 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5144 /* Emit code to restore saved registers using MOV insns. First register
5145 is restored from POINTER + OFFSET. */
5147 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5151 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5152 if (ix86_save_reg (regno, maybe_eh_return))
5154 emit_move_insn (gen_rtx_REG (Pmode, regno),
5155 adjust_address (gen_rtx_MEM (Pmode, pointer),
5157 offset += UNITS_PER_WORD;
5161 /* Restore function stack, frame, and registers. */
5164 ix86_expand_epilogue (int style)
5167 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5168 struct ix86_frame frame;
5169 HOST_WIDE_INT offset;
5171 ix86_compute_frame_layout (&frame);
5173 /* Calculate start of saved registers relative to ebp. Special care
5174 must be taken for the normal return case of a function using
5175 eh_return: the eax and edx registers are marked as saved, but not
5176 restored along this path. */
5177 offset = frame.nregs;
5178 if (current_function_calls_eh_return && style != 2)
5180 offset *= -UNITS_PER_WORD;
5182 /* If we're only restoring one register and sp is not valid then
5183 using a move instruction to restore the register since it's
5184 less work than reloading sp and popping the register.
5186 The default code result in stack adjustment using add/lea instruction,
5187 while this code results in LEAVE instruction (or discrete equivalent),
5188 so it is profitable in some other cases as well. Especially when there
5189 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5190 and there is exactly one register to pop. This heuristic may need some
5191 tuning in future. */
5192 if ((!sp_valid && frame.nregs <= 1)
5193 || (TARGET_EPILOGUE_USING_MOVE
5194 && cfun->machine->use_fast_prologue_epilogue
5195 && (frame.nregs > 1 || frame.to_allocate))
5196 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5197 || (frame_pointer_needed && TARGET_USE_LEAVE
5198 && cfun->machine->use_fast_prologue_epilogue
5199 && frame.nregs == 1)
5200 || current_function_calls_eh_return)
5202 /* Restore registers. We can use ebp or esp to address the memory
5203 locations. If both are available, default to ebp, since offsets
5204 are known to be small. Only exception is esp pointing directly to the
5205 end of block of saved registers, where we may simplify addressing
5208 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5209 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5210 frame.to_allocate, style == 2);
5212 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5213 offset, style == 2);
5215 /* eh_return epilogues need %ecx added to the stack pointer. */
5218 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5220 if (frame_pointer_needed)
5222 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5223 tmp = plus_constant (tmp, UNITS_PER_WORD);
5224 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5226 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5227 emit_move_insn (hard_frame_pointer_rtx, tmp);
5229 emit_insn (gen_pro_epilogue_adjust_stack
5230 (stack_pointer_rtx, sa, const0_rtx));
5234 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5235 tmp = plus_constant (tmp, (frame.to_allocate
5236 + frame.nregs * UNITS_PER_WORD));
5237 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5240 else if (!frame_pointer_needed)
5241 emit_insn (gen_pro_epilogue_adjust_stack
5242 (stack_pointer_rtx, stack_pointer_rtx,
5243 GEN_INT (frame.to_allocate
5244 + frame.nregs * UNITS_PER_WORD)));
5245 /* If not an i386, mov & pop is faster than "leave". */
5246 else if (TARGET_USE_LEAVE || optimize_size
5247 || !cfun->machine->use_fast_prologue_epilogue)
5248 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5251 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5252 hard_frame_pointer_rtx,
5255 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5257 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5262 /* First step is to deallocate the stack frame so that we can
5263 pop the registers. */
5266 if (!frame_pointer_needed)
5268 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5269 hard_frame_pointer_rtx,
5272 else if (frame.to_allocate)
5273 emit_insn (gen_pro_epilogue_adjust_stack
5274 (stack_pointer_rtx, stack_pointer_rtx,
5275 GEN_INT (frame.to_allocate)));
5277 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5278 if (ix86_save_reg (regno, false))
5281 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5283 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5285 if (frame_pointer_needed)
5287 /* Leave results in shorter dependency chains on CPUs that are
5288 able to grok it fast. */
5289 if (TARGET_USE_LEAVE)
5290 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5291 else if (TARGET_64BIT)
5292 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5294 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5298 /* Sibcall epilogues don't want a return instruction. */
5302 if (current_function_pops_args && current_function_args_size)
5304 rtx popc = GEN_INT (current_function_pops_args);
5306 /* i386 can only pop 64K bytes. If asked to pop more, pop
5307 return address, do explicit add, and jump indirectly to the
5310 if (current_function_pops_args >= 65536)
5312 rtx ecx = gen_rtx_REG (SImode, 2);
5314 /* There are is no "pascal" calling convention in 64bit ABI. */
5318 emit_insn (gen_popsi1 (ecx));
5319 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5320 emit_jump_insn (gen_return_indirect_internal (ecx));
5323 emit_jump_insn (gen_return_pop_internal (popc));
5326 emit_jump_insn (gen_return_internal ());
5329 /* Reset from the function's potential modifications. */
5332 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5333 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5335 if (pic_offset_table_rtx)
5336 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5339 /* Extract the parts of an RTL expression that is a valid memory address
5340 for an instruction. Return 0 if the structure of the address is
5341 grossly off. Return -1 if the address contains ASHIFT, so it is not
5342 strictly valid, but still used for computing length of lea instruction. */
5345 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5347 rtx base = NULL_RTX;
5348 rtx index = NULL_RTX;
5349 rtx disp = NULL_RTX;
5350 HOST_WIDE_INT scale = 1;
5351 rtx scale_rtx = NULL_RTX;
5353 enum ix86_address_seg seg = SEG_DEFAULT;
5355 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5357 else if (GET_CODE (addr) == PLUS)
5367 addends[n++] = XEXP (op, 1);
5370 while (GET_CODE (op) == PLUS);
5375 for (i = n; i >= 0; --i)
5378 switch (GET_CODE (op))
5383 index = XEXP (op, 0);
5384 scale_rtx = XEXP (op, 1);
5388 if (XINT (op, 1) == UNSPEC_TP
5389 && TARGET_TLS_DIRECT_SEG_REFS
5390 && seg == SEG_DEFAULT)
5391 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5420 else if (GET_CODE (addr) == MULT)
5422 index = XEXP (addr, 0); /* index*scale */
5423 scale_rtx = XEXP (addr, 1);
5425 else if (GET_CODE (addr) == ASHIFT)
5429 /* We're called for lea too, which implements ashift on occasion. */
5430 index = XEXP (addr, 0);
5431 tmp = XEXP (addr, 1);
5432 if (GET_CODE (tmp) != CONST_INT)
5434 scale = INTVAL (tmp);
5435 if ((unsigned HOST_WIDE_INT) scale > 3)
5441 disp = addr; /* displacement */
5443 /* Extract the integral value of scale. */
5446 if (GET_CODE (scale_rtx) != CONST_INT)
5448 scale = INTVAL (scale_rtx);
5451 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5452 if (base && index && scale == 1
5453 && (index == arg_pointer_rtx
5454 || index == frame_pointer_rtx
5455 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5462 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5463 if ((base == hard_frame_pointer_rtx
5464 || base == frame_pointer_rtx
5465 || base == arg_pointer_rtx) && !disp)
5468 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5469 Avoid this by transforming to [%esi+0]. */
5470 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5471 && base && !index && !disp
5473 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5476 /* Special case: encode reg+reg instead of reg*2. */
5477 if (!base && index && scale && scale == 2)
5478 base = index, scale = 1;
5480 /* Special case: scaling cannot be encoded without base or displacement. */
5481 if (!base && !disp && index && scale != 1)
5493 /* Return cost of the memory address x.
5494 For i386, it is better to use a complex address than let gcc copy
5495 the address into a reg and make a new pseudo. But not if the address
5496 requires to two regs - that would mean more pseudos with longer
5499 ix86_address_cost (rtx x)
5501 struct ix86_address parts;
5504 if (!ix86_decompose_address (x, &parts))
5507 if (parts.base && GET_CODE (parts.base) == SUBREG)
5508 parts.base = SUBREG_REG (parts.base);
5509 if (parts.index && GET_CODE (parts.index) == SUBREG)
5510 parts.index = SUBREG_REG (parts.index);
5512 /* More complex memory references are better. */
5513 if (parts.disp && parts.disp != const0_rtx)
5515 if (parts.seg != SEG_DEFAULT)
5518 /* Attempt to minimize number of registers in the address. */
5520 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5522 && (!REG_P (parts.index)
5523 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5527 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5529 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5530 && parts.base != parts.index)
5533 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5534 since it's predecode logic can't detect the length of instructions
5535 and it degenerates to vector decoded. Increase cost of such
5536 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5537 to split such addresses or even refuse such addresses at all.
5539 Following addressing modes are affected:
5544 The first and last case may be avoidable by explicitly coding the zero in
5545 memory address, but I don't have AMD-K6 machine handy to check this
5549 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5550 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5551 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5557 /* If X is a machine specific address (i.e. a symbol or label being
5558 referenced as a displacement from the GOT implemented using an
5559 UNSPEC), then return the base term. Otherwise return X. */
5562 ix86_find_base_term (rtx x)
5568 if (GET_CODE (x) != CONST)
5571 if (GET_CODE (term) == PLUS
5572 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5573 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5574 term = XEXP (term, 0);
5575 if (GET_CODE (term) != UNSPEC
5576 || XINT (term, 1) != UNSPEC_GOTPCREL)
5579 term = XVECEXP (term, 0, 0);
5581 if (GET_CODE (term) != SYMBOL_REF
5582 && GET_CODE (term) != LABEL_REF)
5588 term = ix86_delegitimize_address (x);
5590 if (GET_CODE (term) != SYMBOL_REF
5591 && GET_CODE (term) != LABEL_REF)
5597 /* Determine if a given RTX is a valid constant. We already know this
5598 satisfies CONSTANT_P. */
5601 legitimate_constant_p (rtx x)
5605 switch (GET_CODE (x))
5608 /* TLS symbols are not constant. */
5609 if (tls_symbolic_operand (x, Pmode))
5614 inner = XEXP (x, 0);
5616 /* Offsets of TLS symbols are never valid.
5617 Discourage CSE from creating them. */
5618 if (GET_CODE (inner) == PLUS
5619 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5622 if (GET_CODE (inner) == PLUS)
5624 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5626 inner = XEXP (inner, 0);
5629 /* Only some unspecs are valid as "constants". */
5630 if (GET_CODE (inner) == UNSPEC)
5631 switch (XINT (inner, 1))
5635 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5637 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5647 /* Otherwise we handle everything else in the move patterns. */
5651 /* Determine if it's legal to put X into the constant pool. This
5652 is not possible for the address of thread-local symbols, which
5653 is checked above. */
5656 ix86_cannot_force_const_mem (rtx x)
5658 return !legitimate_constant_p (x);
5661 /* Determine if a given RTX is a valid constant address. */
5664 constant_address_p (rtx x)
5666 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5669 /* Nonzero if the constant value X is a legitimate general operand
5670 when generating PIC code. It is given that flag_pic is on and
5671 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5674 legitimate_pic_operand_p (rtx x)
5678 switch (GET_CODE (x))
5681 inner = XEXP (x, 0);
5683 /* Only some unspecs are valid as "constants". */
5684 if (GET_CODE (inner) == UNSPEC)
5685 switch (XINT (inner, 1))
5688 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5696 return legitimate_pic_address_disp_p (x);
5703 /* Determine if a given CONST RTX is a valid memory displacement
5707 legitimate_pic_address_disp_p (register rtx disp)
5711 /* In 64bit mode we can allow direct addresses of symbols and labels
5712 when they are not dynamic symbols. */
5715 /* TLS references should always be enclosed in UNSPEC. */
5716 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5718 if (GET_CODE (disp) == SYMBOL_REF
5719 && ix86_cmodel == CM_SMALL_PIC
5720 && SYMBOL_REF_LOCAL_P (disp))
5722 if (GET_CODE (disp) == LABEL_REF)
5724 if (GET_CODE (disp) == CONST
5725 && GET_CODE (XEXP (disp, 0)) == PLUS)
5727 rtx op0 = XEXP (XEXP (disp, 0), 0);
5728 rtx op1 = XEXP (XEXP (disp, 0), 1);
5730 /* TLS references should always be enclosed in UNSPEC. */
5731 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5733 if (((GET_CODE (op0) == SYMBOL_REF
5734 && ix86_cmodel == CM_SMALL_PIC
5735 && SYMBOL_REF_LOCAL_P (op0))
5736 || GET_CODE (op0) == LABEL_REF)
5737 && GET_CODE (op1) == CONST_INT
5738 && INTVAL (op1) < 16*1024*1024
5739 && INTVAL (op1) >= -16*1024*1024)
5743 if (GET_CODE (disp) != CONST)
5745 disp = XEXP (disp, 0);
5749 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5750 of GOT tables. We should not need these anyway. */
5751 if (GET_CODE (disp) != UNSPEC
5752 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5755 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5756 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5762 if (GET_CODE (disp) == PLUS)
5764 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5766 disp = XEXP (disp, 0);
5770 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5771 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5773 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5774 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5775 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5777 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5778 if (! strcmp (sym_name, "<pic base>"))
5783 if (GET_CODE (disp) != UNSPEC)
5786 switch (XINT (disp, 1))
5791 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5793 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5794 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5795 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5797 case UNSPEC_GOTTPOFF:
5798 case UNSPEC_GOTNTPOFF:
5799 case UNSPEC_INDNTPOFF:
5802 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5804 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5806 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5812 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5813 memory address for an instruction. The MODE argument is the machine mode
5814 for the MEM expression that wants to use this address.
5816 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5817 convert common non-canonical forms to canonical form so that they will
5821 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5823 struct ix86_address parts;
5824 rtx base, index, disp;
5825 HOST_WIDE_INT scale;
5826 const char *reason = NULL;
5827 rtx reason_rtx = NULL_RTX;
5829 if (TARGET_DEBUG_ADDR)
5832 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5833 GET_MODE_NAME (mode), strict);
5837 if (ix86_decompose_address (addr, &parts) <= 0)
5839 reason = "decomposition failed";
5844 index = parts.index;
5846 scale = parts.scale;
5848 /* Validate base register.
5850 Don't allow SUBREG's here, it can lead to spill failures when the base
5851 is one word out of a two word structure, which is represented internally
5859 if (GET_CODE (base) == SUBREG)
5860 reg = SUBREG_REG (base);
5864 if (GET_CODE (reg) != REG)
5866 reason = "base is not a register";
5870 if (GET_MODE (base) != Pmode)
5872 reason = "base is not in Pmode";
5876 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5877 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5879 reason = "base is not valid";
5884 /* Validate index register.
5886 Don't allow SUBREG's here, it can lead to spill failures when the index
5887 is one word out of a two word structure, which is represented internally
5895 if (GET_CODE (index) == SUBREG)
5896 reg = SUBREG_REG (index);
5900 if (GET_CODE (reg) != REG)
5902 reason = "index is not a register";
5906 if (GET_MODE (index) != Pmode)
5908 reason = "index is not in Pmode";
5912 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5913 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5915 reason = "index is not valid";
5920 /* Validate scale factor. */
5923 reason_rtx = GEN_INT (scale);
5926 reason = "scale without index";
5930 if (scale != 2 && scale != 4 && scale != 8)
5932 reason = "scale is not a valid multiplier";
5937 /* Validate displacement. */
5942 if (GET_CODE (disp) == CONST
5943 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5944 switch (XINT (XEXP (disp, 0), 1))
5948 case UNSPEC_GOTPCREL:
5951 goto is_legitimate_pic;
5953 case UNSPEC_GOTTPOFF:
5954 case UNSPEC_GOTNTPOFF:
5955 case UNSPEC_INDNTPOFF:
5961 reason = "invalid address unspec";
5965 else if (flag_pic && (SYMBOLIC_CONST (disp)
5967 && !machopic_operand_p (disp)
5972 if (TARGET_64BIT && (index || base))
5974 /* foo@dtpoff(%rX) is ok. */
5975 if (GET_CODE (disp) != CONST
5976 || GET_CODE (XEXP (disp, 0)) != PLUS
5977 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5978 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5979 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5980 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5982 reason = "non-constant pic memory reference";
5986 else if (! legitimate_pic_address_disp_p (disp))
5988 reason = "displacement is an invalid pic construct";
5992 /* This code used to verify that a symbolic pic displacement
5993 includes the pic_offset_table_rtx register.
5995 While this is good idea, unfortunately these constructs may
5996 be created by "adds using lea" optimization for incorrect
6005 This code is nonsensical, but results in addressing
6006 GOT table with pic_offset_table_rtx base. We can't
6007 just refuse it easily, since it gets matched by
6008 "addsi3" pattern, that later gets split to lea in the
6009 case output register differs from input. While this
6010 can be handled by separate addsi pattern for this case
6011 that never results in lea, this seems to be easier and
6012 correct fix for crash to disable this test. */
6014 else if (GET_CODE (disp) != LABEL_REF
6015 && GET_CODE (disp) != CONST_INT
6016 && (GET_CODE (disp) != CONST
6017 || !legitimate_constant_p (disp))
6018 && (GET_CODE (disp) != SYMBOL_REF
6019 || !legitimate_constant_p (disp)))
6021 reason = "displacement is not constant";
6024 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6026 reason = "displacement is out of range";
6031 /* Everything looks valid. */
6032 if (TARGET_DEBUG_ADDR)
6033 fprintf (stderr, "Success.\n");
6037 if (TARGET_DEBUG_ADDR)
6039 fprintf (stderr, "Error: %s\n", reason);
6040 debug_rtx (reason_rtx);
6045 /* Return an unique alias set for the GOT. */
6047 static HOST_WIDE_INT
6048 ix86_GOT_alias_set (void)
6050 static HOST_WIDE_INT set = -1;
6052 set = new_alias_set ();
6056 /* Return a legitimate reference for ORIG (an address) using the
6057 register REG. If REG is 0, a new pseudo is generated.
6059 There are two types of references that must be handled:
6061 1. Global data references must load the address from the GOT, via
6062 the PIC reg. An insn is emitted to do this load, and the reg is
6065 2. Static data references, constant pool addresses, and code labels
6066 compute the address as an offset from the GOT, whose base is in
6067 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6068 differentiate them from global data objects. The returned
6069 address is the PIC reg + an unspec constant.
6071 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6072 reg also appears in the address. */
6075 legitimize_pic_address (rtx orig, rtx reg)
6083 reg = gen_reg_rtx (Pmode);
6084 /* Use the generic Mach-O PIC machinery. */
6085 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6088 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6090 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6092 /* This symbol may be referenced via a displacement from the PIC
6093 base address (@GOTOFF). */
6095 if (reload_in_progress)
6096 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6097 if (GET_CODE (addr) == CONST)
6098 addr = XEXP (addr, 0);
6099 if (GET_CODE (addr) == PLUS)
6101 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6102 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6105 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6106 new = gen_rtx_CONST (Pmode, new);
6107 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6111 emit_move_insn (reg, new);
6115 else if (GET_CODE (addr) == SYMBOL_REF)
6119 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6120 new = gen_rtx_CONST (Pmode, new);
6121 new = gen_rtx_MEM (Pmode, new);
6122 RTX_UNCHANGING_P (new) = 1;
6123 set_mem_alias_set (new, ix86_GOT_alias_set ());
6126 reg = gen_reg_rtx (Pmode);
6127 /* Use directly gen_movsi, otherwise the address is loaded
6128 into register for CSE. We don't want to CSE this addresses,
6129 instead we CSE addresses from the GOT table, so skip this. */
6130 emit_insn (gen_movsi (reg, new));
6135 /* This symbol must be referenced via a load from the
6136 Global Offset Table (@GOT). */
6138 if (reload_in_progress)
6139 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6140 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6141 new = gen_rtx_CONST (Pmode, new);
6142 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6143 new = gen_rtx_MEM (Pmode, new);
6144 RTX_UNCHANGING_P (new) = 1;
6145 set_mem_alias_set (new, ix86_GOT_alias_set ());
6148 reg = gen_reg_rtx (Pmode);
6149 emit_move_insn (reg, new);
6155 if (GET_CODE (addr) == CONST)
6157 addr = XEXP (addr, 0);
6159 /* We must match stuff we generate before. Assume the only
6160 unspecs that can get here are ours. Not that we could do
6161 anything with them anyway... */
6162 if (GET_CODE (addr) == UNSPEC
6163 || (GET_CODE (addr) == PLUS
6164 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6166 if (GET_CODE (addr) != PLUS)
6169 if (GET_CODE (addr) == PLUS)
6171 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6173 /* Check first to see if this is a constant offset from a @GOTOFF
6174 symbol reference. */
6175 if (local_symbolic_operand (op0, Pmode)
6176 && GET_CODE (op1) == CONST_INT)
6180 if (reload_in_progress)
6181 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6182 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6184 new = gen_rtx_PLUS (Pmode, new, op1);
6185 new = gen_rtx_CONST (Pmode, new);
6186 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6190 emit_move_insn (reg, new);
6196 if (INTVAL (op1) < -16*1024*1024
6197 || INTVAL (op1) >= 16*1024*1024)
6198 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6203 base = legitimize_pic_address (XEXP (addr, 0), reg);
6204 new = legitimize_pic_address (XEXP (addr, 1),
6205 base == reg ? NULL_RTX : reg);
6207 if (GET_CODE (new) == CONST_INT)
6208 new = plus_constant (base, INTVAL (new));
6211 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6213 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6214 new = XEXP (new, 1);
6216 new = gen_rtx_PLUS (Pmode, base, new);
6224 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6227 get_thread_pointer (int to_reg)
6231 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6235 reg = gen_reg_rtx (Pmode);
6236 insn = gen_rtx_SET (VOIDmode, reg, tp);
6237 insn = emit_insn (insn);
6242 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6243 false if we expect this to be used for a memory address and true if
6244 we expect to load the address into a register. */
6247 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6249 rtx dest, base, off, pic;
6254 case TLS_MODEL_GLOBAL_DYNAMIC:
6255 dest = gen_reg_rtx (Pmode);
6258 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6261 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6262 insns = get_insns ();
6265 emit_libcall_block (insns, dest, rax, x);
6268 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6271 case TLS_MODEL_LOCAL_DYNAMIC:
6272 base = gen_reg_rtx (Pmode);
6275 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6278 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6279 insns = get_insns ();
6282 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6283 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6284 emit_libcall_block (insns, base, rax, note);
6287 emit_insn (gen_tls_local_dynamic_base_32 (base));
6289 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6290 off = gen_rtx_CONST (Pmode, off);
6292 return gen_rtx_PLUS (Pmode, base, off);
6294 case TLS_MODEL_INITIAL_EXEC:
6298 type = UNSPEC_GOTNTPOFF;
6302 if (reload_in_progress)
6303 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6304 pic = pic_offset_table_rtx;
6305 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6307 else if (!TARGET_GNU_TLS)
6309 pic = gen_reg_rtx (Pmode);
6310 emit_insn (gen_set_got (pic));
6311 type = UNSPEC_GOTTPOFF;
6316 type = UNSPEC_INDNTPOFF;
6319 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6320 off = gen_rtx_CONST (Pmode, off);
6322 off = gen_rtx_PLUS (Pmode, pic, off);
6323 off = gen_rtx_MEM (Pmode, off);
6324 RTX_UNCHANGING_P (off) = 1;
6325 set_mem_alias_set (off, ix86_GOT_alias_set ());
6327 if (TARGET_64BIT || TARGET_GNU_TLS)
6329 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6330 off = force_reg (Pmode, off);
6331 return gen_rtx_PLUS (Pmode, base, off);
6335 base = get_thread_pointer (true);
6336 dest = gen_reg_rtx (Pmode);
6337 emit_insn (gen_subsi3 (dest, base, off));
6341 case TLS_MODEL_LOCAL_EXEC:
6342 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6343 (TARGET_64BIT || TARGET_GNU_TLS)
6344 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6345 off = gen_rtx_CONST (Pmode, off);
6347 if (TARGET_64BIT || TARGET_GNU_TLS)
6349 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6350 return gen_rtx_PLUS (Pmode, base, off);
6354 base = get_thread_pointer (true);
6355 dest = gen_reg_rtx (Pmode);
6356 emit_insn (gen_subsi3 (dest, base, off));
6367 /* Try machine-dependent ways of modifying an illegitimate address
6368 to be legitimate. If we find one, return the new, valid address.
6369 This macro is used in only one place: `memory_address' in explow.c.
6371 OLDX is the address as it was before break_out_memory_refs was called.
6372 In some cases it is useful to look at this to decide what needs to be done.
6374 MODE and WIN are passed so that this macro can use
6375 GO_IF_LEGITIMATE_ADDRESS.
6377 It is always safe for this macro to do nothing. It exists to recognize
6378 opportunities to optimize the output.
6380 For the 80386, we handle X+REG by loading X into a register R and
6381 using R+REG. R will go in a general reg and indexing will be used.
6382 However, if REG is a broken-out memory address or multiplication,
6383 nothing needs to be done because REG can certainly go in a general reg.
6385 When -fpic is used, special handling is needed for symbolic references.
6386 See comments by legitimize_pic_address in i386.c for details. */
6389 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6390 enum machine_mode mode)
6395 if (TARGET_DEBUG_ADDR)
6397 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6398 GET_MODE_NAME (mode));
6402 log = tls_symbolic_operand (x, mode);
6404 return legitimize_tls_address (x, log, false);
6406 if (flag_pic && SYMBOLIC_CONST (x))
6407 return legitimize_pic_address (x, 0);
6409 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6410 if (GET_CODE (x) == ASHIFT
6411 && GET_CODE (XEXP (x, 1)) == CONST_INT
6412 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6415 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6416 GEN_INT (1 << log));
6419 if (GET_CODE (x) == PLUS)
6421 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6423 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6425 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6428 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6429 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6430 GEN_INT (1 << log));
6433 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6434 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6435 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6438 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6439 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6440 GEN_INT (1 << log));
6443 /* Put multiply first if it isn't already. */
6444 if (GET_CODE (XEXP (x, 1)) == MULT)
6446 rtx tmp = XEXP (x, 0);
6447 XEXP (x, 0) = XEXP (x, 1);
6452 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6453 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6454 created by virtual register instantiation, register elimination, and
6455 similar optimizations. */
6456 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6459 x = gen_rtx_PLUS (Pmode,
6460 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6461 XEXP (XEXP (x, 1), 0)),
6462 XEXP (XEXP (x, 1), 1));
6466 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6467 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6468 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6469 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6470 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6471 && CONSTANT_P (XEXP (x, 1)))
6474 rtx other = NULL_RTX;
6476 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6478 constant = XEXP (x, 1);
6479 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6481 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6483 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6484 other = XEXP (x, 1);
6492 x = gen_rtx_PLUS (Pmode,
6493 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6494 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6495 plus_constant (other, INTVAL (constant)));
6499 if (changed && legitimate_address_p (mode, x, FALSE))
6502 if (GET_CODE (XEXP (x, 0)) == MULT)
6505 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6508 if (GET_CODE (XEXP (x, 1)) == MULT)
6511 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6515 && GET_CODE (XEXP (x, 1)) == REG
6516 && GET_CODE (XEXP (x, 0)) == REG)
6519 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6522 x = legitimize_pic_address (x, 0);
6525 if (changed && legitimate_address_p (mode, x, FALSE))
6528 if (GET_CODE (XEXP (x, 0)) == REG)
6530 register rtx temp = gen_reg_rtx (Pmode);
6531 register rtx val = force_operand (XEXP (x, 1), temp);
6533 emit_move_insn (temp, val);
6539 else if (GET_CODE (XEXP (x, 1)) == REG)
6541 register rtx temp = gen_reg_rtx (Pmode);
6542 register rtx val = force_operand (XEXP (x, 0), temp);
6544 emit_move_insn (temp, val);
6554 /* Print an integer constant expression in assembler syntax. Addition
6555 and subtraction are the only arithmetic that may appear in these
6556 expressions. FILE is the stdio stream to write to, X is the rtx, and
6557 CODE is the operand print code from the output string. */
6560 output_pic_addr_const (FILE *file, rtx x, int code)
6564 switch (GET_CODE (x))
6574 assemble_name (file, XSTR (x, 0));
6575 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6576 fputs ("@PLT", file);
6583 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6584 assemble_name (asm_out_file, buf);
6588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6592 /* This used to output parentheses around the expression,
6593 but that does not work on the 386 (either ATT or BSD assembler). */
6594 output_pic_addr_const (file, XEXP (x, 0), code);
6598 if (GET_MODE (x) == VOIDmode)
6600 /* We can use %d if the number is <32 bits and positive. */
6601 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6602 fprintf (file, "0x%lx%08lx",
6603 (unsigned long) CONST_DOUBLE_HIGH (x),
6604 (unsigned long) CONST_DOUBLE_LOW (x));
6606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6609 /* We can't handle floating point constants;
6610 PRINT_OPERAND must handle them. */
6611 output_operand_lossage ("floating constant misused");
6615 /* Some assemblers need integer constants to appear first. */
6616 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6618 output_pic_addr_const (file, XEXP (x, 0), code);
6620 output_pic_addr_const (file, XEXP (x, 1), code);
6622 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6624 output_pic_addr_const (file, XEXP (x, 1), code);
6626 output_pic_addr_const (file, XEXP (x, 0), code);
6634 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6635 output_pic_addr_const (file, XEXP (x, 0), code);
6637 output_pic_addr_const (file, XEXP (x, 1), code);
6639 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6643 if (XVECLEN (x, 0) != 1)
6645 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6646 switch (XINT (x, 1))
6649 fputs ("@GOT", file);
6652 fputs ("@GOTOFF", file);
6654 case UNSPEC_GOTPCREL:
6655 fputs ("@GOTPCREL(%rip)", file);
6657 case UNSPEC_GOTTPOFF:
6658 /* FIXME: This might be @TPOFF in Sun ld too. */
6659 fputs ("@GOTTPOFF", file);
6662 fputs ("@TPOFF", file);
6666 fputs ("@TPOFF", file);
6668 fputs ("@NTPOFF", file);
6671 fputs ("@DTPOFF", file);
6673 case UNSPEC_GOTNTPOFF:
6675 fputs ("@GOTTPOFF(%rip)", file);
6677 fputs ("@GOTNTPOFF", file);
6679 case UNSPEC_INDNTPOFF:
6680 fputs ("@INDNTPOFF", file);
6683 output_operand_lossage ("invalid UNSPEC as operand");
6689 output_operand_lossage ("invalid expression as operand");
6693 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6694 We need to handle our special PIC relocations. */
6697 i386_dwarf_output_addr_const (FILE *file, rtx x)
6700 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6704 fprintf (file, "%s", ASM_LONG);
6707 output_pic_addr_const (file, x, '\0');
6709 output_addr_const (file, x);
6713 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6714 We need to emit DTP-relative relocations. */
6717 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6719 fputs (ASM_LONG, file);
6720 output_addr_const (file, x);
6721 fputs ("@DTPOFF", file);
6727 fputs (", 0", file);
6734 /* In the name of slightly smaller debug output, and to cater to
6735 general assembler losage, recognize PIC+GOTOFF and turn it back
6736 into a direct symbol reference. */
6739 ix86_delegitimize_address (rtx orig_x)
6743 if (GET_CODE (x) == MEM)
6748 if (GET_CODE (x) != CONST
6749 || GET_CODE (XEXP (x, 0)) != UNSPEC
6750 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6751 || GET_CODE (orig_x) != MEM)
6753 return XVECEXP (XEXP (x, 0), 0, 0);
6756 if (GET_CODE (x) != PLUS
6757 || GET_CODE (XEXP (x, 1)) != CONST)
6760 if (GET_CODE (XEXP (x, 0)) == REG
6761 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6762 /* %ebx + GOT/GOTOFF */
6764 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6766 /* %ebx + %reg * scale + GOT/GOTOFF */
6768 if (GET_CODE (XEXP (y, 0)) == REG
6769 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6771 else if (GET_CODE (XEXP (y, 1)) == REG
6772 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6776 if (GET_CODE (y) != REG
6777 && GET_CODE (y) != MULT
6778 && GET_CODE (y) != ASHIFT)
6784 x = XEXP (XEXP (x, 1), 0);
6785 if (GET_CODE (x) == UNSPEC
6786 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6787 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6790 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6791 return XVECEXP (x, 0, 0);
6794 if (GET_CODE (x) == PLUS
6795 && GET_CODE (XEXP (x, 0)) == UNSPEC
6796 && GET_CODE (XEXP (x, 1)) == CONST_INT
6797 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6798 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6799 && GET_CODE (orig_x) != MEM)))
6801 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6803 return gen_rtx_PLUS (Pmode, y, x);
6811 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6816 if (mode == CCFPmode || mode == CCFPUmode)
6818 enum rtx_code second_code, bypass_code;
6819 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6820 if (bypass_code != NIL || second_code != NIL)
6822 code = ix86_fp_compare_code_to_integer (code);
6826 code = reverse_condition (code);
6837 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6842 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6843 Those same assemblers have the same but opposite losage on cmov. */
6846 suffix = fp ? "nbe" : "a";
6849 if (mode == CCNOmode || mode == CCGOCmode)
6851 else if (mode == CCmode || mode == CCGCmode)
6862 if (mode == CCNOmode || mode == CCGOCmode)
6864 else if (mode == CCmode || mode == CCGCmode)
6873 suffix = fp ? "nb" : "ae";
6876 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6886 suffix = fp ? "u" : "p";
6889 suffix = fp ? "nu" : "np";
6894 fputs (suffix, file);
6898 print_reg (rtx x, int code, FILE *file)
6900 if (REGNO (x) == ARG_POINTER_REGNUM
6901 || REGNO (x) == FRAME_POINTER_REGNUM
6902 || REGNO (x) == FLAGS_REG
6903 || REGNO (x) == FPSR_REG)
6906 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6909 if (code == 'w' || MMX_REG_P (x))
6911 else if (code == 'b')
6913 else if (code == 'k')
6915 else if (code == 'q')
6917 else if (code == 'y')
6919 else if (code == 'h')
6922 code = GET_MODE_SIZE (GET_MODE (x));
6924 /* Irritatingly, AMD extended registers use different naming convention
6925 from the normal registers. */
6926 if (REX_INT_REG_P (x))
6933 error ("extended registers have no high halves");
6936 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6939 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6942 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6945 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6948 error ("unsupported operand size for extended register");
6956 if (STACK_TOP_P (x))
6958 fputs ("st(0)", file);
6965 if (! ANY_FP_REG_P (x))
6966 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6970 fputs (hi_reg_name[REGNO (x)], file);
6973 fputs (qi_reg_name[REGNO (x)], file);
6976 fputs (qi_high_reg_name[REGNO (x)], file);
6983 /* Locate some local-dynamic symbol still in use by this function
6984 so that we can print its name in some tls_local_dynamic_base
6988 get_some_local_dynamic_name (void)
6992 if (cfun->machine->some_ld_name)
6993 return cfun->machine->some_ld_name;
6995 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6997 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6998 return cfun->machine->some_ld_name;
7004 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7008 if (GET_CODE (x) == SYMBOL_REF
7009 && local_dynamic_symbolic_operand (x, Pmode))
7011 cfun->machine->some_ld_name = XSTR (x, 0);
7019 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7020 C -- print opcode suffix for set/cmov insn.
7021 c -- like C, but print reversed condition
7022 F,f -- likewise, but for floating-point.
7023 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7025 R -- print the prefix for register names.
7026 z -- print the opcode suffix for the size of the current operand.
7027 * -- print a star (in certain assembler syntax)
7028 A -- print an absolute memory reference.
7029 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7030 s -- print a shift double count, followed by the assemblers argument
7032 b -- print the QImode name of the register for the indicated operand.
7033 %b0 would print %al if operands[0] is reg 0.
7034 w -- likewise, print the HImode name of the register.
7035 k -- likewise, print the SImode name of the register.
7036 q -- likewise, print the DImode name of the register.
7037 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7038 y -- print "st(0)" instead of "st" as a register.
7039 D -- print condition for SSE cmp instruction.
7040 P -- if PIC, print an @PLT suffix.
7041 X -- don't print any sort of PIC '@' suffix for a symbol.
7042 & -- print some in-use local-dynamic symbol name.
7046 print_operand (FILE *file, rtx x, int code)
7053 if (ASSEMBLER_DIALECT == ASM_ATT)
7058 assemble_name (file, get_some_local_dynamic_name ());
7062 if (ASSEMBLER_DIALECT == ASM_ATT)
7064 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7066 /* Intel syntax. For absolute addresses, registers should not
7067 be surrounded by braces. */
7068 if (GET_CODE (x) != REG)
7071 PRINT_OPERAND (file, x, 0);
7079 PRINT_OPERAND (file, x, 0);
7084 if (ASSEMBLER_DIALECT == ASM_ATT)
7089 if (ASSEMBLER_DIALECT == ASM_ATT)
7094 if (ASSEMBLER_DIALECT == ASM_ATT)
7099 if (ASSEMBLER_DIALECT == ASM_ATT)
7104 if (ASSEMBLER_DIALECT == ASM_ATT)
7109 if (ASSEMBLER_DIALECT == ASM_ATT)
7114 /* 387 opcodes don't get size suffixes if the operands are
7116 if (STACK_REG_P (x))
7119 /* Likewise if using Intel opcodes. */
7120 if (ASSEMBLER_DIALECT == ASM_INTEL)
7123 /* This is the size of op from size of operand. */
7124 switch (GET_MODE_SIZE (GET_MODE (x)))
7127 #ifdef HAVE_GAS_FILDS_FISTS
7133 if (GET_MODE (x) == SFmode)
7148 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7150 #ifdef GAS_MNEMONICS
7176 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7178 PRINT_OPERAND (file, x, 0);
7184 /* Little bit of braindamage here. The SSE compare instructions
7185 does use completely different names for the comparisons that the
7186 fp conditional moves. */
7187 switch (GET_CODE (x))
7202 fputs ("unord", file);
7206 fputs ("neq", file);
7210 fputs ("nlt", file);
7214 fputs ("nle", file);
7217 fputs ("ord", file);
7225 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7226 if (ASSEMBLER_DIALECT == ASM_ATT)
7228 switch (GET_MODE (x))
7230 case HImode: putc ('w', file); break;
7232 case SFmode: putc ('l', file); break;
7234 case DFmode: putc ('q', file); break;
7242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7245 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7246 if (ASSEMBLER_DIALECT == ASM_ATT)
7249 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7252 /* Like above, but reverse condition */
7254 /* Check to see if argument to %c is really a constant
7255 and not a condition code which needs to be reversed. */
7256 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7258 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7261 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7264 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7265 if (ASSEMBLER_DIALECT == ASM_ATT)
7268 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7274 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7277 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7280 int pred_val = INTVAL (XEXP (x, 0));
7282 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7283 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7285 int taken = pred_val > REG_BR_PROB_BASE / 2;
7286 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7288 /* Emit hints only in the case default branch prediction
7289 heuristics would fail. */
7290 if (taken != cputaken)
7292 /* We use 3e (DS) prefix for taken branches and
7293 2e (CS) prefix for not taken branches. */
7295 fputs ("ds ; ", file);
7297 fputs ("cs ; ", file);
7304 output_operand_lossage ("invalid operand code `%c'", code);
7308 if (GET_CODE (x) == REG)
7310 PRINT_REG (x, code, file);
7313 else if (GET_CODE (x) == MEM)
7315 /* No `byte ptr' prefix for call instructions. */
7316 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7319 switch (GET_MODE_SIZE (GET_MODE (x)))
7321 case 1: size = "BYTE"; break;
7322 case 2: size = "WORD"; break;
7323 case 4: size = "DWORD"; break;
7324 case 8: size = "QWORD"; break;
7325 case 12: size = "XWORD"; break;
7326 case 16: size = "XMMWORD"; break;
7331 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7334 else if (code == 'w')
7336 else if (code == 'k')
7340 fputs (" PTR ", file);
7344 /* Avoid (%rip) for call operands. */
7345 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7346 && GET_CODE (x) != CONST_INT)
7347 output_addr_const (file, x);
7348 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7349 output_operand_lossage ("invalid constraints for operand");
7354 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7359 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7360 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7362 if (ASSEMBLER_DIALECT == ASM_ATT)
7364 fprintf (file, "0x%lx", l);
7367 /* These float cases don't actually occur as immediate operands. */
7368 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7372 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7373 fprintf (file, "%s", dstr);
7376 else if (GET_CODE (x) == CONST_DOUBLE
7377 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7381 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7382 fprintf (file, "%s", dstr);
7389 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7391 if (ASSEMBLER_DIALECT == ASM_ATT)
7394 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7395 || GET_CODE (x) == LABEL_REF)
7397 if (ASSEMBLER_DIALECT == ASM_ATT)
7400 fputs ("OFFSET FLAT:", file);
7403 if (GET_CODE (x) == CONST_INT)
7404 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7406 output_pic_addr_const (file, x, code);
7408 output_addr_const (file, x);
7412 /* Print a memory operand whose address is ADDR. */
7415 print_operand_address (FILE *file, register rtx addr)
7417 struct ix86_address parts;
7418 rtx base, index, disp;
7421 if (! ix86_decompose_address (addr, &parts))
7425 index = parts.index;
7427 scale = parts.scale;
7435 if (USER_LABEL_PREFIX[0] == 0)
7437 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7443 if (!base && !index)
7445 /* Displacement only requires special attention. */
7447 if (GET_CODE (disp) == CONST_INT)
7449 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7451 if (USER_LABEL_PREFIX[0] == 0)
7453 fputs ("ds:", file);
7455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7458 output_pic_addr_const (file, disp, 0);
7460 output_addr_const (file, disp);
7462 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7464 && ((GET_CODE (disp) == SYMBOL_REF
7465 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7466 || GET_CODE (disp) == LABEL_REF
7467 || (GET_CODE (disp) == CONST
7468 && GET_CODE (XEXP (disp, 0)) == PLUS
7469 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7470 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7471 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7472 fputs ("(%rip)", file);
7476 if (ASSEMBLER_DIALECT == ASM_ATT)
7481 output_pic_addr_const (file, disp, 0);
7482 else if (GET_CODE (disp) == LABEL_REF)
7483 output_asm_label (disp);
7485 output_addr_const (file, disp);
7490 PRINT_REG (base, 0, file);
7494 PRINT_REG (index, 0, file);
7496 fprintf (file, ",%d", scale);
7502 rtx offset = NULL_RTX;
7506 /* Pull out the offset of a symbol; print any symbol itself. */
7507 if (GET_CODE (disp) == CONST
7508 && GET_CODE (XEXP (disp, 0)) == PLUS
7509 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7511 offset = XEXP (XEXP (disp, 0), 1);
7512 disp = gen_rtx_CONST (VOIDmode,
7513 XEXP (XEXP (disp, 0), 0));
7517 output_pic_addr_const (file, disp, 0);
7518 else if (GET_CODE (disp) == LABEL_REF)
7519 output_asm_label (disp);
7520 else if (GET_CODE (disp) == CONST_INT)
7523 output_addr_const (file, disp);
7529 PRINT_REG (base, 0, file);
7532 if (INTVAL (offset) >= 0)
7534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7538 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7545 PRINT_REG (index, 0, file);
7547 fprintf (file, "*%d", scale);
7555 output_addr_const_extra (FILE *file, rtx x)
7559 if (GET_CODE (x) != UNSPEC)
7562 op = XVECEXP (x, 0, 0);
7563 switch (XINT (x, 1))
7565 case UNSPEC_GOTTPOFF:
7566 output_addr_const (file, op);
7567 /* FIXME: This might be @TPOFF in Sun ld. */
7568 fputs ("@GOTTPOFF", file);
7571 output_addr_const (file, op);
7572 fputs ("@TPOFF", file);
7575 output_addr_const (file, op);
7577 fputs ("@TPOFF", file);
7579 fputs ("@NTPOFF", file);
7582 output_addr_const (file, op);
7583 fputs ("@DTPOFF", file);
7585 case UNSPEC_GOTNTPOFF:
7586 output_addr_const (file, op);
7588 fputs ("@GOTTPOFF(%rip)", file);
7590 fputs ("@GOTNTPOFF", file);
7592 case UNSPEC_INDNTPOFF:
7593 output_addr_const (file, op);
7594 fputs ("@INDNTPOFF", file);
7604 /* Split one or more DImode RTL references into pairs of SImode
7605 references. The RTL can be REG, offsettable MEM, integer constant, or
7606 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7607 split and "num" is its length. lo_half and hi_half are output arrays
7608 that parallel "operands". */
7611 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7615 rtx op = operands[num];
7617 /* simplify_subreg refuse to split volatile memory addresses,
7618 but we still have to handle it. */
7619 if (GET_CODE (op) == MEM)
7621 lo_half[num] = adjust_address (op, SImode, 0);
7622 hi_half[num] = adjust_address (op, SImode, 4);
7626 lo_half[num] = simplify_gen_subreg (SImode, op,
7627 GET_MODE (op) == VOIDmode
7628 ? DImode : GET_MODE (op), 0);
7629 hi_half[num] = simplify_gen_subreg (SImode, op,
7630 GET_MODE (op) == VOIDmode
7631 ? DImode : GET_MODE (op), 4);
7635 /* Split one or more TImode RTL references into pairs of SImode
7636 references. The RTL can be REG, offsettable MEM, integer constant, or
7637 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7638 split and "num" is its length. lo_half and hi_half are output arrays
7639 that parallel "operands". */
7642 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7646 rtx op = operands[num];
7648 /* simplify_subreg refuse to split volatile memory addresses, but we
7649 still have to handle it. */
7650 if (GET_CODE (op) == MEM)
7652 lo_half[num] = adjust_address (op, DImode, 0);
7653 hi_half[num] = adjust_address (op, DImode, 8);
7657 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7658 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7663 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7664 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7665 is the expression of the binary operation. The output may either be
7666 emitted here, or returned to the caller, like all output_* functions.
7668 There is no guarantee that the operands are the same mode, as they
7669 might be within FLOAT or FLOAT_EXTEND expressions. */
7671 #ifndef SYSV386_COMPAT
7672 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7673 wants to fix the assemblers because that causes incompatibility
7674 with gcc. No-one wants to fix gcc because that causes
7675 incompatibility with assemblers... You can use the option of
7676 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7677 #define SYSV386_COMPAT 1
7681 output_387_binary_op (rtx insn, rtx *operands)
7683 static char buf[30];
7686 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7688 #ifdef ENABLE_CHECKING
7689 /* Even if we do not want to check the inputs, this documents input
7690 constraints. Which helps in understanding the following code. */
7691 if (STACK_REG_P (operands[0])
7692 && ((REG_P (operands[1])
7693 && REGNO (operands[0]) == REGNO (operands[1])
7694 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7695 || (REG_P (operands[2])
7696 && REGNO (operands[0]) == REGNO (operands[2])
7697 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7698 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7704 switch (GET_CODE (operands[3]))
7707 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7708 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7716 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7717 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7725 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7726 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7734 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7735 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7749 if (GET_MODE (operands[0]) == SFmode)
7750 strcat (buf, "ss\t{%2, %0|%0, %2}");
7752 strcat (buf, "sd\t{%2, %0|%0, %2}");
7757 switch (GET_CODE (operands[3]))
7761 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7763 rtx temp = operands[2];
7764 operands[2] = operands[1];
7768 /* know operands[0] == operands[1]. */
7770 if (GET_CODE (operands[2]) == MEM)
7776 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7778 if (STACK_TOP_P (operands[0]))
7779 /* How is it that we are storing to a dead operand[2]?
7780 Well, presumably operands[1] is dead too. We can't
7781 store the result to st(0) as st(0) gets popped on this
7782 instruction. Instead store to operands[2] (which I
7783 think has to be st(1)). st(1) will be popped later.
7784 gcc <= 2.8.1 didn't have this check and generated
7785 assembly code that the Unixware assembler rejected. */
7786 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7788 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7792 if (STACK_TOP_P (operands[0]))
7793 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7795 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7800 if (GET_CODE (operands[1]) == MEM)
7806 if (GET_CODE (operands[2]) == MEM)
7812 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7815 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7816 derived assemblers, confusingly reverse the direction of
7817 the operation for fsub{r} and fdiv{r} when the
7818 destination register is not st(0). The Intel assembler
7819 doesn't have this brain damage. Read !SYSV386_COMPAT to
7820 figure out what the hardware really does. */
7821 if (STACK_TOP_P (operands[0]))
7822 p = "{p\t%0, %2|rp\t%2, %0}";
7824 p = "{rp\t%2, %0|p\t%0, %2}";
7826 if (STACK_TOP_P (operands[0]))
7827 /* As above for fmul/fadd, we can't store to st(0). */
7828 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7830 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7835 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7838 if (STACK_TOP_P (operands[0]))
7839 p = "{rp\t%0, %1|p\t%1, %0}";
7841 p = "{p\t%1, %0|rp\t%0, %1}";
7843 if (STACK_TOP_P (operands[0]))
7844 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7846 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7851 if (STACK_TOP_P (operands[0]))
7853 if (STACK_TOP_P (operands[1]))
7854 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7856 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7859 else if (STACK_TOP_P (operands[1]))
7862 p = "{\t%1, %0|r\t%0, %1}";
7864 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7870 p = "{r\t%2, %0|\t%0, %2}";
7872 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7885 /* Output code to initialize control word copies used by
7886 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7887 is set to control word rounding downwards. */
7889 emit_i387_cw_initialization (rtx normal, rtx round_down)
7891 rtx reg = gen_reg_rtx (HImode);
7893 emit_insn (gen_x86_fnstcw_1 (normal));
7894 emit_move_insn (reg, normal);
7895 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7897 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7899 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7900 emit_move_insn (round_down, reg);
7903 /* Output code for INSN to convert a float to a signed int. OPERANDS
7904 are the insn operands. The output may be [HSD]Imode and the input
7905 operand may be [SDX]Fmode. */
7908 output_fix_trunc (rtx insn, rtx *operands)
7910 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7911 int dimode_p = GET_MODE (operands[0]) == DImode;
7913 /* Jump through a hoop or two for DImode, since the hardware has no
7914 non-popping instruction. We used to do this a different way, but
7915 that was somewhat fragile and broke with post-reload splitters. */
7916 if (dimode_p && !stack_top_dies)
7917 output_asm_insn ("fld\t%y1", operands);
7919 if (!STACK_TOP_P (operands[1]))
7922 if (GET_CODE (operands[0]) != MEM)
7925 output_asm_insn ("fldcw\t%3", operands);
7926 if (stack_top_dies || dimode_p)
7927 output_asm_insn ("fistp%z0\t%0", operands);
7929 output_asm_insn ("fist%z0\t%0", operands);
7930 output_asm_insn ("fldcw\t%2", operands);
7935 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7936 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7937 when fucom should be used. */
7940 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7943 rtx cmp_op0 = operands[0];
7944 rtx cmp_op1 = operands[1];
7945 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7950 cmp_op1 = operands[2];
7954 if (GET_MODE (operands[0]) == SFmode)
7956 return "ucomiss\t{%1, %0|%0, %1}";
7958 return "comiss\t{%1, %0|%0, %1}";
7961 return "ucomisd\t{%1, %0|%0, %1}";
7963 return "comisd\t{%1, %0|%0, %1}";
7966 if (! STACK_TOP_P (cmp_op0))
7969 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7971 if (STACK_REG_P (cmp_op1)
7973 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7974 && REGNO (cmp_op1) != FIRST_STACK_REG)
7976 /* If both the top of the 387 stack dies, and the other operand
7977 is also a stack register that dies, then this must be a
7978 `fcompp' float compare */
7982 /* There is no double popping fcomi variant. Fortunately,
7983 eflags is immune from the fstp's cc clobbering. */
7985 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7987 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7995 return "fucompp\n\tfnstsw\t%0";
7997 return "fcompp\n\tfnstsw\t%0";
8010 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8012 static const char * const alt[24] =
8024 "fcomi\t{%y1, %0|%0, %y1}",
8025 "fcomip\t{%y1, %0|%0, %y1}",
8026 "fucomi\t{%y1, %0|%0, %y1}",
8027 "fucomip\t{%y1, %0|%0, %y1}",
8034 "fcom%z2\t%y2\n\tfnstsw\t%0",
8035 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8036 "fucom%z2\t%y2\n\tfnstsw\t%0",
8037 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8039 "ficom%z2\t%y2\n\tfnstsw\t%0",
8040 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8048 mask = eflags_p << 3;
8049 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8050 mask |= unordered_p << 1;
8051 mask |= stack_top_dies;
8064 ix86_output_addr_vec_elt (FILE *file, int value)
8066 const char *directive = ASM_LONG;
8071 directive = ASM_QUAD;
8077 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8081 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8084 fprintf (file, "%s%s%d-%s%d\n",
8085 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8086 else if (HAVE_AS_GOTOFF_IN_DATA)
8087 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8089 else if (TARGET_MACHO)
8091 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8092 machopic_output_function_base_name (file);
8093 fprintf(file, "\n");
8097 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8098 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8101 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8105 ix86_expand_clear (rtx dest)
8109 /* We play register width games, which are only valid after reload. */
8110 if (!reload_completed)
8113 /* Avoid HImode and its attendant prefix byte. */
8114 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8115 dest = gen_rtx_REG (SImode, REGNO (dest));
8117 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8119 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8120 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8122 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8123 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8129 /* X is an unchanging MEM. If it is a constant pool reference, return
8130 the constant pool rtx, else NULL. */
8133 maybe_get_pool_constant (rtx x)
8135 x = ix86_delegitimize_address (XEXP (x, 0));
8137 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8138 return get_pool_constant (x);
8144 ix86_expand_move (enum machine_mode mode, rtx operands[])
8146 int strict = (reload_in_progress || reload_completed);
8148 enum tls_model model;
8153 model = tls_symbolic_operand (op1, Pmode);
8156 op1 = legitimize_tls_address (op1, model, true);
8157 op1 = force_operand (op1, op0);
8162 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8167 rtx temp = ((reload_in_progress
8168 || ((op0 && GET_CODE (op0) == REG)
8170 ? op0 : gen_reg_rtx (Pmode));
8171 op1 = machopic_indirect_data_reference (op1, temp);
8172 op1 = machopic_legitimize_pic_address (op1, mode,
8173 temp == op1 ? 0 : temp);
8175 else if (MACHOPIC_INDIRECT)
8176 op1 = machopic_indirect_data_reference (op1, 0);
8180 if (GET_CODE (op0) == MEM)
8181 op1 = force_reg (Pmode, op1);
8185 if (GET_CODE (temp) != REG)
8186 temp = gen_reg_rtx (Pmode);
8187 temp = legitimize_pic_address (op1, temp);
8192 #endif /* TARGET_MACHO */
8196 if (GET_CODE (op0) == MEM
8197 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8198 || !push_operand (op0, mode))
8199 && GET_CODE (op1) == MEM)
8200 op1 = force_reg (mode, op1);
8202 if (push_operand (op0, mode)
8203 && ! general_no_elim_operand (op1, mode))
8204 op1 = copy_to_mode_reg (mode, op1);
8206 /* Force large constants in 64bit compilation into register
8207 to get them CSEed. */
8208 if (TARGET_64BIT && mode == DImode
8209 && immediate_operand (op1, mode)
8210 && !x86_64_zero_extended_value (op1)
8211 && !register_operand (op0, mode)
8212 && optimize && !reload_completed && !reload_in_progress)
8213 op1 = copy_to_mode_reg (mode, op1);
8215 if (FLOAT_MODE_P (mode))
8217 /* If we are loading a floating point constant to a register,
8218 force the value to memory now, since we'll get better code
8219 out the back end. */
8223 else if (GET_CODE (op1) == CONST_DOUBLE)
8225 op1 = validize_mem (force_const_mem (mode, op1));
8226 if (!register_operand (op0, mode))
8228 rtx temp = gen_reg_rtx (mode);
8229 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8230 emit_move_insn (op0, temp);
8237 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8241 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8243 /* Force constants other than zero into memory. We do not know how
8244 the instructions used to build constants modify the upper 64 bits
8245 of the register, once we have that information we may be able
8246 to handle some of them more efficiently. */
8247 if ((reload_in_progress | reload_completed) == 0
8248 && register_operand (operands[0], mode)
8249 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8250 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8252 /* Make operand1 a register if it isn't already. */
8254 && !register_operand (operands[0], mode)
8255 && !register_operand (operands[1], mode))
8257 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8258 emit_move_insn (operands[0], temp);
8262 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8265 /* Attempt to expand a binary operator. Make the expansion closer to the
8266 actual machine, then just general_operand, which will allow 3 separate
8267 memory references (one output, two input) in a single insn. */
8270 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8273 int matching_memory;
8274 rtx src1, src2, dst, op, clob;
8280 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8281 if (GET_RTX_CLASS (code) == 'c'
8282 && (rtx_equal_p (dst, src2)
8283 || immediate_operand (src1, mode)))
8290 /* If the destination is memory, and we do not have matching source
8291 operands, do things in registers. */
8292 matching_memory = 0;
8293 if (GET_CODE (dst) == MEM)
8295 if (rtx_equal_p (dst, src1))
8296 matching_memory = 1;
8297 else if (GET_RTX_CLASS (code) == 'c'
8298 && rtx_equal_p (dst, src2))
8299 matching_memory = 2;
8301 dst = gen_reg_rtx (mode);
8304 /* Both source operands cannot be in memory. */
8305 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8307 if (matching_memory != 2)
8308 src2 = force_reg (mode, src2);
8310 src1 = force_reg (mode, src1);
8313 /* If the operation is not commutable, source 1 cannot be a constant
8314 or non-matching memory. */
8315 if ((CONSTANT_P (src1)
8316 || (!matching_memory && GET_CODE (src1) == MEM))
8317 && GET_RTX_CLASS (code) != 'c')
8318 src1 = force_reg (mode, src1);
8320 /* If optimizing, copy to regs to improve CSE */
8321 if (optimize && ! no_new_pseudos)
8323 if (GET_CODE (dst) == MEM)
8324 dst = gen_reg_rtx (mode);
8325 if (GET_CODE (src1) == MEM)
8326 src1 = force_reg (mode, src1);
8327 if (GET_CODE (src2) == MEM)
8328 src2 = force_reg (mode, src2);
8331 /* Emit the instruction. */
8333 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8334 if (reload_in_progress)
8336 /* Reload doesn't know about the flags register, and doesn't know that
8337 it doesn't want to clobber it. We can only do this with PLUS. */
8344 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8345 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8348 /* Fix up the destination if needed. */
8349 if (dst != operands[0])
8350 emit_move_insn (operands[0], dst);
8353 /* Return TRUE or FALSE depending on whether the binary operator meets the
8354 appropriate constraints. */
8357 ix86_binary_operator_ok (enum rtx_code code,
8358 enum machine_mode mode ATTRIBUTE_UNUSED,
8361 /* Both source operands cannot be in memory. */
8362 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8364 /* If the operation is not commutable, source 1 cannot be a constant. */
8365 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8367 /* If the destination is memory, we must have a matching source operand. */
8368 if (GET_CODE (operands[0]) == MEM
8369 && ! (rtx_equal_p (operands[0], operands[1])
8370 || (GET_RTX_CLASS (code) == 'c'
8371 && rtx_equal_p (operands[0], operands[2]))))
8373 /* If the operation is not commutable and the source 1 is memory, we must
8374 have a matching destination. */
8375 if (GET_CODE (operands[1]) == MEM
8376 && GET_RTX_CLASS (code) != 'c'
8377 && ! rtx_equal_p (operands[0], operands[1]))
8382 /* Attempt to expand a unary operator. Make the expansion closer to the
8383 actual machine, then just general_operand, which will allow 2 separate
8384 memory references (one output, one input) in a single insn. */
8387 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8390 int matching_memory;
8391 rtx src, dst, op, clob;
8396 /* If the destination is memory, and we do not have matching source
8397 operands, do things in registers. */
8398 matching_memory = 0;
8399 if (GET_CODE (dst) == MEM)
8401 if (rtx_equal_p (dst, src))
8402 matching_memory = 1;
8404 dst = gen_reg_rtx (mode);
8407 /* When source operand is memory, destination must match. */
8408 if (!matching_memory && GET_CODE (src) == MEM)
8409 src = force_reg (mode, src);
8411 /* If optimizing, copy to regs to improve CSE */
8412 if (optimize && ! no_new_pseudos)
8414 if (GET_CODE (dst) == MEM)
8415 dst = gen_reg_rtx (mode);
8416 if (GET_CODE (src) == MEM)
8417 src = force_reg (mode, src);
8420 /* Emit the instruction. */
8422 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8423 if (reload_in_progress || code == NOT)
8425 /* Reload doesn't know about the flags register, and doesn't know that
8426 it doesn't want to clobber it. */
8433 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8434 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8437 /* Fix up the destination if needed. */
8438 if (dst != operands[0])
8439 emit_move_insn (operands[0], dst);
8442 /* Return TRUE or FALSE depending on whether the unary operator meets the
8443 appropriate constraints. */
8446 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8447 enum machine_mode mode ATTRIBUTE_UNUSED,
8448 rtx operands[2] ATTRIBUTE_UNUSED)
8450 /* If one of operands is memory, source and destination must match. */
8451 if ((GET_CODE (operands[0]) == MEM
8452 || GET_CODE (operands[1]) == MEM)
8453 && ! rtx_equal_p (operands[0], operands[1]))
8458 /* Return TRUE or FALSE depending on whether the first SET in INSN
8459 has source and destination with matching CC modes, and that the
8460 CC mode is at least as constrained as REQ_MODE. */
8463 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8466 enum machine_mode set_mode;
8468 set = PATTERN (insn);
8469 if (GET_CODE (set) == PARALLEL)
8470 set = XVECEXP (set, 0, 0);
8471 if (GET_CODE (set) != SET)
8473 if (GET_CODE (SET_SRC (set)) != COMPARE)
8476 set_mode = GET_MODE (SET_DEST (set));
8480 if (req_mode != CCNOmode
8481 && (req_mode != CCmode
8482 || XEXP (SET_SRC (set), 1) != const0_rtx))
8486 if (req_mode == CCGCmode)
8490 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8494 if (req_mode == CCZmode)
8504 return (GET_MODE (SET_SRC (set)) == set_mode);
8507 /* Generate insn patterns to do an integer compare of OPERANDS. */
8510 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8512 enum machine_mode cmpmode;
8515 cmpmode = SELECT_CC_MODE (code, op0, op1);
8516 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8518 /* This is very simple, but making the interface the same as in the
8519 FP case makes the rest of the code easier. */
8520 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8521 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8523 /* Return the test that should be put into the flags user, i.e.
8524 the bcc, scc, or cmov instruction. */
8525 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8528 /* Figure out whether to use ordered or unordered fp comparisons.
8529 Return the appropriate mode to use. */
8532 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8534 /* ??? In order to make all comparisons reversible, we do all comparisons
8535 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8536 all forms trapping and nontrapping comparisons, we can make inequality
8537 comparisons trapping again, since it results in better code when using
8538 FCOM based compares. */
8539 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8543 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8545 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8546 return ix86_fp_compare_mode (code);
8549 /* Only zero flag is needed. */
8551 case NE: /* ZF!=0 */
8553 /* Codes needing carry flag. */
8554 case GEU: /* CF=0 */
8555 case GTU: /* CF=0 & ZF=0 */
8556 case LTU: /* CF=1 */
8557 case LEU: /* CF=1 | ZF=1 */
8559 /* Codes possibly doable only with sign flag when
8560 comparing against zero. */
8561 case GE: /* SF=OF or SF=0 */
8562 case LT: /* SF<>OF or SF=1 */
8563 if (op1 == const0_rtx)
8566 /* For other cases Carry flag is not required. */
8568 /* Codes doable only with sign flag when comparing
8569 against zero, but we miss jump instruction for it
8570 so we need to use relational tests against overflow
8571 that thus needs to be zero. */
8572 case GT: /* ZF=0 & SF=OF */
8573 case LE: /* ZF=1 | SF<>OF */
8574 if (op1 == const0_rtx)
8578 /* strcmp pattern do (use flags) and combine may ask us for proper
8587 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8590 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8592 enum rtx_code swapped_code = swap_condition (code);
8593 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8594 || (ix86_fp_comparison_cost (swapped_code)
8595 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8598 /* Swap, force into registers, or otherwise massage the two operands
8599 to a fp comparison. The operands are updated in place; the new
8600 comparison code is returned. */
8602 static enum rtx_code
8603 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8605 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8606 rtx op0 = *pop0, op1 = *pop1;
8607 enum machine_mode op_mode = GET_MODE (op0);
8608 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8610 /* All of the unordered compare instructions only work on registers.
8611 The same is true of the XFmode compare instructions. The same is
8612 true of the fcomi compare instructions. */
8615 && (fpcmp_mode == CCFPUmode
8616 || op_mode == XFmode
8617 || op_mode == TFmode
8618 || ix86_use_fcomi_compare (code)))
8620 op0 = force_reg (op_mode, op0);
8621 op1 = force_reg (op_mode, op1);
8625 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8626 things around if they appear profitable, otherwise force op0
8629 if (standard_80387_constant_p (op0) == 0
8630 || (GET_CODE (op0) == MEM
8631 && ! (standard_80387_constant_p (op1) == 0
8632 || GET_CODE (op1) == MEM)))
8635 tmp = op0, op0 = op1, op1 = tmp;
8636 code = swap_condition (code);
8639 if (GET_CODE (op0) != REG)
8640 op0 = force_reg (op_mode, op0);
8642 if (CONSTANT_P (op1))
8644 if (standard_80387_constant_p (op1))
8645 op1 = force_reg (op_mode, op1);
8647 op1 = validize_mem (force_const_mem (op_mode, op1));
8651 /* Try to rearrange the comparison to make it cheaper. */
8652 if (ix86_fp_comparison_cost (code)
8653 > ix86_fp_comparison_cost (swap_condition (code))
8654 && (GET_CODE (op1) == REG || !no_new_pseudos))
8657 tmp = op0, op0 = op1, op1 = tmp;
8658 code = swap_condition (code);
8659 if (GET_CODE (op0) != REG)
8660 op0 = force_reg (op_mode, op0);
8668 /* Convert comparison codes we use to represent FP comparison to integer
8669 code that will result in proper branch. Return UNKNOWN if no such code
8671 static enum rtx_code
8672 ix86_fp_compare_code_to_integer (enum rtx_code code)
8701 /* Split comparison code CODE into comparisons we can do using branch
8702 instructions. BYPASS_CODE is comparison code for branch that will
8703 branch around FIRST_CODE and SECOND_CODE. If some of branches
8704 is not required, set value to NIL.
8705 We never require more than two branches. */
8707 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8708 enum rtx_code *first_code,
8709 enum rtx_code *second_code)
8715 /* The fcomi comparison sets flags as follows:
8725 case GT: /* GTU - CF=0 & ZF=0 */
8726 case GE: /* GEU - CF=0 */
8727 case ORDERED: /* PF=0 */
8728 case UNORDERED: /* PF=1 */
8729 case UNEQ: /* EQ - ZF=1 */
8730 case UNLT: /* LTU - CF=1 */
8731 case UNLE: /* LEU - CF=1 | ZF=1 */
8732 case LTGT: /* EQ - ZF=0 */
8734 case LT: /* LTU - CF=1 - fails on unordered */
8736 *bypass_code = UNORDERED;
8738 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8740 *bypass_code = UNORDERED;
8742 case EQ: /* EQ - ZF=1 - fails on unordered */
8744 *bypass_code = UNORDERED;
8746 case NE: /* NE - ZF=0 - fails on unordered */
8748 *second_code = UNORDERED;
8750 case UNGE: /* GEU - CF=0 - fails on unordered */
8752 *second_code = UNORDERED;
8754 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8756 *second_code = UNORDERED;
8761 if (!TARGET_IEEE_FP)
8768 /* Return cost of comparison done fcom + arithmetics operations on AX.
8769 All following functions do use number of instructions as a cost metrics.
8770 In future this should be tweaked to compute bytes for optimize_size and
8771 take into account performance of various instructions on various CPUs. */
8773 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8775 if (!TARGET_IEEE_FP)
8777 /* The cost of code output by ix86_expand_fp_compare. */
8805 /* Return cost of comparison done using fcomi operation.
8806 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8808 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8810 enum rtx_code bypass_code, first_code, second_code;
8811 /* Return arbitrarily high cost when instruction is not supported - this
8812 prevents gcc from using it. */
8815 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8816 return (bypass_code != NIL || second_code != NIL) + 2;
8819 /* Return cost of comparison done using sahf operation.
8820 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8822 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8824 enum rtx_code bypass_code, first_code, second_code;
8825 /* Return arbitrarily high cost when instruction is not preferred - this
8826 avoids gcc from using it. */
8827 if (!TARGET_USE_SAHF && !optimize_size)
8829 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8830 return (bypass_code != NIL || second_code != NIL) + 3;
8833 /* Compute cost of the comparison done using any method.
8834 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8836 ix86_fp_comparison_cost (enum rtx_code code)
8838 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8841 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8842 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8844 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8845 if (min > sahf_cost)
8847 if (min > fcomi_cost)
8852 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8855 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8856 rtx *second_test, rtx *bypass_test)
8858 enum machine_mode fpcmp_mode, intcmp_mode;
8860 int cost = ix86_fp_comparison_cost (code);
8861 enum rtx_code bypass_code, first_code, second_code;
8863 fpcmp_mode = ix86_fp_compare_mode (code);
8864 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8867 *second_test = NULL_RTX;
8869 *bypass_test = NULL_RTX;
8871 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8873 /* Do fcomi/sahf based test when profitable. */
8874 if ((bypass_code == NIL || bypass_test)
8875 && (second_code == NIL || second_test)
8876 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8880 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8881 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8887 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8888 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8890 scratch = gen_reg_rtx (HImode);
8891 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8892 emit_insn (gen_x86_sahf_1 (scratch));
8895 /* The FP codes work out to act like unsigned. */
8896 intcmp_mode = fpcmp_mode;
8898 if (bypass_code != NIL)
8899 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8900 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8902 if (second_code != NIL)
8903 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8904 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8909 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8910 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8911 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8913 scratch = gen_reg_rtx (HImode);
8914 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8916 /* In the unordered case, we have to check C2 for NaN's, which
8917 doesn't happen to work out to anything nice combination-wise.
8918 So do some bit twiddling on the value we've got in AH to come
8919 up with an appropriate set of condition codes. */
8921 intcmp_mode = CCNOmode;
8926 if (code == GT || !TARGET_IEEE_FP)
8928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8934 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8935 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8936 intcmp_mode = CCmode;
8942 if (code == LT && TARGET_IEEE_FP)
8944 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8945 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8946 intcmp_mode = CCmode;
8951 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8957 if (code == GE || !TARGET_IEEE_FP)
8959 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8964 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8965 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8972 if (code == LE && TARGET_IEEE_FP)
8974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8975 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8976 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8977 intcmp_mode = CCmode;
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8988 if (code == EQ && TARGET_IEEE_FP)
8990 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8991 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8992 intcmp_mode = CCmode;
8997 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9004 if (code == NE && TARGET_IEEE_FP)
9006 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9007 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9013 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9032 /* Return the test that should be put into the flags user, i.e.
9033 the bcc, scc, or cmov instruction. */
9034 return gen_rtx_fmt_ee (code, VOIDmode,
9035 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9040 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9043 op0 = ix86_compare_op0;
9044 op1 = ix86_compare_op1;
9047 *second_test = NULL_RTX;
9049 *bypass_test = NULL_RTX;
9051 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9052 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9053 second_test, bypass_test);
9055 ret = ix86_expand_int_compare (code, op0, op1);
9060 /* Return true if the CODE will result in nontrivial jump sequence. */
9062 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9064 enum rtx_code bypass_code, first_code, second_code;
9067 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9068 return bypass_code != NIL || second_code != NIL;
9072 ix86_expand_branch (enum rtx_code code, rtx label)
9076 switch (GET_MODE (ix86_compare_op0))
9082 tmp = ix86_expand_compare (code, NULL, NULL);
9083 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9084 gen_rtx_LABEL_REF (VOIDmode, label),
9086 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9096 enum rtx_code bypass_code, first_code, second_code;
9098 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9103 /* Check whether we will use the natural sequence with one jump. If
9104 so, we can expand jump early. Otherwise delay expansion by
9105 creating compound insn to not confuse optimizers. */
9106 if (bypass_code == NIL && second_code == NIL
9109 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9110 gen_rtx_LABEL_REF (VOIDmode, label),
9115 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9116 ix86_compare_op0, ix86_compare_op1);
9117 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9118 gen_rtx_LABEL_REF (VOIDmode, label),
9120 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9122 use_fcomi = ix86_use_fcomi_compare (code);
9123 vec = rtvec_alloc (3 + !use_fcomi);
9124 RTVEC_ELT (vec, 0) = tmp;
9126 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9131 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9133 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9141 /* Expand DImode branch into multiple compare+branch. */
9143 rtx lo[2], hi[2], label2;
9144 enum rtx_code code1, code2, code3;
9146 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9148 tmp = ix86_compare_op0;
9149 ix86_compare_op0 = ix86_compare_op1;
9150 ix86_compare_op1 = tmp;
9151 code = swap_condition (code);
9153 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9154 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9156 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9157 avoid two branches. This costs one extra insn, so disable when
9158 optimizing for size. */
9160 if ((code == EQ || code == NE)
9162 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9167 if (hi[1] != const0_rtx)
9168 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9169 NULL_RTX, 0, OPTAB_WIDEN);
9172 if (lo[1] != const0_rtx)
9173 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9174 NULL_RTX, 0, OPTAB_WIDEN);
9176 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9177 NULL_RTX, 0, OPTAB_WIDEN);
9179 ix86_compare_op0 = tmp;
9180 ix86_compare_op1 = const0_rtx;
9181 ix86_expand_branch (code, label);
9185 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9186 op1 is a constant and the low word is zero, then we can just
9187 examine the high word. */
9189 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9192 case LT: case LTU: case GE: case GEU:
9193 ix86_compare_op0 = hi[0];
9194 ix86_compare_op1 = hi[1];
9195 ix86_expand_branch (code, label);
9201 /* Otherwise, we need two or three jumps. */
9203 label2 = gen_label_rtx ();
9206 code2 = swap_condition (code);
9207 code3 = unsigned_condition (code);
9211 case LT: case GT: case LTU: case GTU:
9214 case LE: code1 = LT; code2 = GT; break;
9215 case GE: code1 = GT; code2 = LT; break;
9216 case LEU: code1 = LTU; code2 = GTU; break;
9217 case GEU: code1 = GTU; code2 = LTU; break;
9219 case EQ: code1 = NIL; code2 = NE; break;
9220 case NE: code2 = NIL; break;
9228 * if (hi(a) < hi(b)) goto true;
9229 * if (hi(a) > hi(b)) goto false;
9230 * if (lo(a) < lo(b)) goto true;
9234 ix86_compare_op0 = hi[0];
9235 ix86_compare_op1 = hi[1];
9238 ix86_expand_branch (code1, label);
9240 ix86_expand_branch (code2, label2);
9242 ix86_compare_op0 = lo[0];
9243 ix86_compare_op1 = lo[1];
9244 ix86_expand_branch (code3, label);
9247 emit_label (label2);
9256 /* Split branch based on floating point condition. */
9258 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9259 rtx target1, rtx target2, rtx tmp)
9262 rtx label = NULL_RTX;
9264 int bypass_probability = -1, second_probability = -1, probability = -1;
9267 if (target2 != pc_rtx)
9270 code = reverse_condition_maybe_unordered (code);
9275 condition = ix86_expand_fp_compare (code, op1, op2,
9276 tmp, &second, &bypass);
9278 if (split_branch_probability >= 0)
9280 /* Distribute the probabilities across the jumps.
9281 Assume the BYPASS and SECOND to be always test
9283 probability = split_branch_probability;
9285 /* Value of 1 is low enough to make no need for probability
9286 to be updated. Later we may run some experiments and see
9287 if unordered values are more frequent in practice. */
9289 bypass_probability = 1;
9291 second_probability = 1;
9293 if (bypass != NULL_RTX)
9295 label = gen_label_rtx ();
9296 i = emit_jump_insn (gen_rtx_SET
9298 gen_rtx_IF_THEN_ELSE (VOIDmode,
9300 gen_rtx_LABEL_REF (VOIDmode,
9303 if (bypass_probability >= 0)
9305 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9306 GEN_INT (bypass_probability),
9309 i = emit_jump_insn (gen_rtx_SET
9311 gen_rtx_IF_THEN_ELSE (VOIDmode,
9312 condition, target1, target2)));
9313 if (probability >= 0)
9315 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9316 GEN_INT (probability),
9318 if (second != NULL_RTX)
9320 i = emit_jump_insn (gen_rtx_SET
9322 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9324 if (second_probability >= 0)
9326 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9327 GEN_INT (second_probability),
9330 if (label != NULL_RTX)
9335 ix86_expand_setcc (enum rtx_code code, rtx dest)
9337 rtx ret, tmp, tmpreg, equiv;
9338 rtx second_test, bypass_test;
9340 if (GET_MODE (ix86_compare_op0) == DImode
9342 return 0; /* FAIL */
9344 if (GET_MODE (dest) != QImode)
9347 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9348 PUT_MODE (ret, QImode);
9353 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9354 if (bypass_test || second_test)
9356 rtx test = second_test;
9358 rtx tmp2 = gen_reg_rtx (QImode);
9365 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9367 PUT_MODE (test, QImode);
9368 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9371 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9373 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9376 /* Attach a REG_EQUAL note describing the comparison result. */
9377 equiv = simplify_gen_relational (code, QImode,
9378 GET_MODE (ix86_compare_op0),
9379 ix86_compare_op0, ix86_compare_op1);
9380 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9382 return 1; /* DONE */
9385 /* Expand comparison setting or clearing carry flag. Return true when successful
9386 and set pop for the operation. */
9388 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9390 enum machine_mode mode =
9391 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9393 /* Do not handle DImode compares that go trought special path. Also we can't
9394 deal with FP compares yet. This is possible to add. */
9395 if ((mode == DImode && !TARGET_64BIT))
9397 if (FLOAT_MODE_P (mode))
9399 rtx second_test = NULL, bypass_test = NULL;
9400 rtx compare_op, compare_seq;
9402 /* Shortcut: following common codes never translate into carry flag compares. */
9403 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9404 || code == ORDERED || code == UNORDERED)
9407 /* These comparisons require zero flag; swap operands so they won't. */
9408 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9414 code = swap_condition (code);
9417 /* Try to expand the comparison and verify that we end up with carry flag
9418 based comparison. This is fails to be true only when we decide to expand
9419 comparison using arithmetic that is not too common scenario. */
9421 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9422 &second_test, &bypass_test);
9423 compare_seq = get_insns ();
9426 if (second_test || bypass_test)
9428 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9429 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9430 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9432 code = GET_CODE (compare_op);
9433 if (code != LTU && code != GEU)
9435 emit_insn (compare_seq);
9439 if (!INTEGRAL_MODE_P (mode))
9447 /* Convert a==0 into (unsigned)a<1. */
9450 if (op1 != const0_rtx)
9453 code = (code == EQ ? LTU : GEU);
9456 /* Convert a>b into b<a or a>=b-1. */
9459 if (GET_CODE (op1) == CONST_INT)
9461 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9462 /* Bail out on overflow. We still can swap operands but that
9463 would force loading of the constant into register. */
9464 if (op1 == const0_rtx
9465 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9467 code = (code == GTU ? GEU : LTU);
9474 code = (code == GTU ? LTU : GEU);
9478 /* Convert a>=0 into (unsigned)a<0x80000000. */
9481 if (mode == DImode || op1 != const0_rtx)
9483 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9484 code = (code == LT ? GEU : LTU);
9488 if (mode == DImode || op1 != constm1_rtx)
9490 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9491 code = (code == LE ? GEU : LTU);
9497 /* Swapping operands may cause constant to appear as first operand. */
9498 if (!nonimmediate_operand (op0, VOIDmode))
9502 op0 = force_reg (mode, op0);
9504 ix86_compare_op0 = op0;
9505 ix86_compare_op1 = op1;
9506 *pop = ix86_expand_compare (code, NULL, NULL);
9507 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9513 ix86_expand_int_movcc (rtx operands[])
9515 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9516 rtx compare_seq, compare_op;
9517 rtx second_test, bypass_test;
9518 enum machine_mode mode = GET_MODE (operands[0]);
9519 bool sign_bit_compare_p = false;;
9522 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9523 compare_seq = get_insns ();
9526 compare_code = GET_CODE (compare_op);
9528 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9529 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9530 sign_bit_compare_p = true;
9532 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9533 HImode insns, we'd be swallowed in word prefix ops. */
9535 if ((mode != HImode || TARGET_FAST_PREFIX)
9536 && (mode != DImode || TARGET_64BIT)
9537 && GET_CODE (operands[2]) == CONST_INT
9538 && GET_CODE (operands[3]) == CONST_INT)
9540 rtx out = operands[0];
9541 HOST_WIDE_INT ct = INTVAL (operands[2]);
9542 HOST_WIDE_INT cf = INTVAL (operands[3]);
9546 /* Sign bit compares are better done using shifts than we do by using
9548 if (sign_bit_compare_p
9549 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9550 ix86_compare_op1, &compare_op))
9552 /* Detect overlap between destination and compare sources. */
9555 if (!sign_bit_compare_p)
9559 compare_code = GET_CODE (compare_op);
9561 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9562 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9565 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9568 /* To simplify rest of code, restrict to the GEU case. */
9569 if (compare_code == LTU)
9571 HOST_WIDE_INT tmp = ct;
9574 compare_code = reverse_condition (compare_code);
9575 code = reverse_condition (code);
9580 PUT_CODE (compare_op,
9581 reverse_condition_maybe_unordered
9582 (GET_CODE (compare_op)));
9584 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9588 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9589 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9590 tmp = gen_reg_rtx (mode);
9593 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9595 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9599 if (code == GT || code == GE)
9600 code = reverse_condition (code);
9603 HOST_WIDE_INT tmp = ct;
9608 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9609 ix86_compare_op1, VOIDmode, 0, -1);
9622 tmp = expand_simple_binop (mode, PLUS,
9624 copy_rtx (tmp), 1, OPTAB_DIRECT);
9635 tmp = expand_simple_binop (mode, IOR,
9637 copy_rtx (tmp), 1, OPTAB_DIRECT);
9639 else if (diff == -1 && ct)
9649 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9651 tmp = expand_simple_binop (mode, PLUS,
9652 copy_rtx (tmp), GEN_INT (cf),
9653 copy_rtx (tmp), 1, OPTAB_DIRECT);
9661 * andl cf - ct, dest
9671 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9674 tmp = expand_simple_binop (mode, AND,
9676 gen_int_mode (cf - ct, mode),
9677 copy_rtx (tmp), 1, OPTAB_DIRECT);
9679 tmp = expand_simple_binop (mode, PLUS,
9680 copy_rtx (tmp), GEN_INT (ct),
9681 copy_rtx (tmp), 1, OPTAB_DIRECT);
9684 if (!rtx_equal_p (tmp, out))
9685 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9687 return 1; /* DONE */
9693 tmp = ct, ct = cf, cf = tmp;
9695 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9697 /* We may be reversing unordered compare to normal compare, that
9698 is not valid in general (we may convert non-trapping condition
9699 to trapping one), however on i386 we currently emit all
9700 comparisons unordered. */
9701 compare_code = reverse_condition_maybe_unordered (compare_code);
9702 code = reverse_condition_maybe_unordered (code);
9706 compare_code = reverse_condition (compare_code);
9707 code = reverse_condition (code);
9712 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9713 && GET_CODE (ix86_compare_op1) == CONST_INT)
9715 if (ix86_compare_op1 == const0_rtx
9716 && (code == LT || code == GE))
9717 compare_code = code;
9718 else if (ix86_compare_op1 == constm1_rtx)
9722 else if (code == GT)
9727 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9728 if (compare_code != NIL
9729 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9730 && (cf == -1 || ct == -1))
9732 /* If lea code below could be used, only optimize
9733 if it results in a 2 insn sequence. */
9735 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9736 || diff == 3 || diff == 5 || diff == 9)
9737 || (compare_code == LT && ct == -1)
9738 || (compare_code == GE && cf == -1))
9741 * notl op1 (if necessary)
9749 code = reverse_condition (code);
9752 out = emit_store_flag (out, code, ix86_compare_op0,
9753 ix86_compare_op1, VOIDmode, 0, -1);
9755 out = expand_simple_binop (mode, IOR,
9757 out, 1, OPTAB_DIRECT);
9758 if (out != operands[0])
9759 emit_move_insn (operands[0], out);
9761 return 1; /* DONE */
9766 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9767 || diff == 3 || diff == 5 || diff == 9)
9768 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9769 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9775 * lea cf(dest*(ct-cf)),dest
9779 * This also catches the degenerate setcc-only case.
9785 out = emit_store_flag (out, code, ix86_compare_op0,
9786 ix86_compare_op1, VOIDmode, 0, 1);
9789 /* On x86_64 the lea instruction operates on Pmode, so we need
9790 to get arithmetics done in proper mode to match. */
9792 tmp = copy_rtx (out);
9796 out1 = copy_rtx (out);
9797 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9801 tmp = gen_rtx_PLUS (mode, tmp, out1);
9807 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9810 if (!rtx_equal_p (tmp, out))
9813 out = force_operand (tmp, copy_rtx (out));
9815 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9817 if (!rtx_equal_p (out, operands[0]))
9818 emit_move_insn (operands[0], copy_rtx (out));
9820 return 1; /* DONE */
9824 * General case: Jumpful:
9825 * xorl dest,dest cmpl op1, op2
9826 * cmpl op1, op2 movl ct, dest
9828 * decl dest movl cf, dest
9829 * andl (cf-ct),dest 1:
9834 * This is reasonably steep, but branch mispredict costs are
9835 * high on modern cpus, so consider failing only if optimizing
9839 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9840 && BRANCH_COST >= 2)
9846 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9847 /* We may be reversing unordered compare to normal compare,
9848 that is not valid in general (we may convert non-trapping
9849 condition to trapping one), however on i386 we currently
9850 emit all comparisons unordered. */
9851 code = reverse_condition_maybe_unordered (code);
9854 code = reverse_condition (code);
9855 if (compare_code != NIL)
9856 compare_code = reverse_condition (compare_code);
9860 if (compare_code != NIL)
9862 /* notl op1 (if needed)
9867 For x < 0 (resp. x <= -1) there will be no notl,
9868 so if possible swap the constants to get rid of the
9870 True/false will be -1/0 while code below (store flag
9871 followed by decrement) is 0/-1, so the constants need
9872 to be exchanged once more. */
9874 if (compare_code == GE || !cf)
9876 code = reverse_condition (code);
9881 HOST_WIDE_INT tmp = cf;
9886 out = emit_store_flag (out, code, ix86_compare_op0,
9887 ix86_compare_op1, VOIDmode, 0, -1);
9891 out = emit_store_flag (out, code, ix86_compare_op0,
9892 ix86_compare_op1, VOIDmode, 0, 1);
9894 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9895 copy_rtx (out), 1, OPTAB_DIRECT);
9898 out = expand_simple_binop (mode, AND, copy_rtx (out),
9899 gen_int_mode (cf - ct, mode),
9900 copy_rtx (out), 1, OPTAB_DIRECT);
9902 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9903 copy_rtx (out), 1, OPTAB_DIRECT);
9904 if (!rtx_equal_p (out, operands[0]))
9905 emit_move_insn (operands[0], copy_rtx (out));
9907 return 1; /* DONE */
9911 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9913 /* Try a few things more with specific constants and a variable. */
9916 rtx var, orig_out, out, tmp;
9918 if (BRANCH_COST <= 2)
9919 return 0; /* FAIL */
9921 /* If one of the two operands is an interesting constant, load a
9922 constant with the above and mask it in with a logical operation. */
9924 if (GET_CODE (operands[2]) == CONST_INT)
9927 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9928 operands[3] = constm1_rtx, op = and_optab;
9929 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9930 operands[3] = const0_rtx, op = ior_optab;
9932 return 0; /* FAIL */
9934 else if (GET_CODE (operands[3]) == CONST_INT)
9937 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9938 operands[2] = constm1_rtx, op = and_optab;
9939 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9940 operands[2] = const0_rtx, op = ior_optab;
9942 return 0; /* FAIL */
9945 return 0; /* FAIL */
9947 orig_out = operands[0];
9948 tmp = gen_reg_rtx (mode);
9951 /* Recurse to get the constant loaded. */
9952 if (ix86_expand_int_movcc (operands) == 0)
9953 return 0; /* FAIL */
9955 /* Mask in the interesting variable. */
9956 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9958 if (!rtx_equal_p (out, orig_out))
9959 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9961 return 1; /* DONE */
9965 * For comparison with above,
9975 if (! nonimmediate_operand (operands[2], mode))
9976 operands[2] = force_reg (mode, operands[2]);
9977 if (! nonimmediate_operand (operands[3], mode))
9978 operands[3] = force_reg (mode, operands[3]);
9980 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9982 rtx tmp = gen_reg_rtx (mode);
9983 emit_move_insn (tmp, operands[3]);
9986 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9988 rtx tmp = gen_reg_rtx (mode);
9989 emit_move_insn (tmp, operands[2]);
9993 if (! register_operand (operands[2], VOIDmode)
9995 || ! register_operand (operands[3], VOIDmode)))
9996 operands[2] = force_reg (mode, operands[2]);
9999 && ! register_operand (operands[3], VOIDmode))
10000 operands[3] = force_reg (mode, operands[3]);
10002 emit_insn (compare_seq);
10003 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10004 gen_rtx_IF_THEN_ELSE (mode,
10005 compare_op, operands[2],
10008 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10009 gen_rtx_IF_THEN_ELSE (mode,
10011 copy_rtx (operands[3]),
10012 copy_rtx (operands[0]))));
10014 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10015 gen_rtx_IF_THEN_ELSE (mode,
10017 copy_rtx (operands[2]),
10018 copy_rtx (operands[0]))));
10020 return 1; /* DONE */
10024 ix86_expand_fp_movcc (rtx operands[])
10026 enum rtx_code code;
10028 rtx compare_op, second_test, bypass_test;
10030 /* For SF/DFmode conditional moves based on comparisons
10031 in same mode, we may want to use SSE min/max instructions. */
10032 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10033 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10034 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10035 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10036 && (!TARGET_IEEE_FP
10037 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10038 /* We may be called from the post-reload splitter. */
10039 && (!REG_P (operands[0])
10040 || SSE_REG_P (operands[0])
10041 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10043 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10044 code = GET_CODE (operands[1]);
10046 /* See if we have (cross) match between comparison operands and
10047 conditional move operands. */
10048 if (rtx_equal_p (operands[2], op1))
10053 code = reverse_condition_maybe_unordered (code);
10055 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10057 /* Check for min operation. */
10058 if (code == LT || code == UNLE)
10066 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10067 if (memory_operand (op0, VOIDmode))
10068 op0 = force_reg (GET_MODE (operands[0]), op0);
10069 if (GET_MODE (operands[0]) == SFmode)
10070 emit_insn (gen_minsf3 (operands[0], op0, op1));
10072 emit_insn (gen_mindf3 (operands[0], op0, op1));
10075 /* Check for max operation. */
10076 if (code == GT || code == UNGE)
10084 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10085 if (memory_operand (op0, VOIDmode))
10086 op0 = force_reg (GET_MODE (operands[0]), op0);
10087 if (GET_MODE (operands[0]) == SFmode)
10088 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10090 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10094 /* Manage condition to be sse_comparison_operator. In case we are
10095 in non-ieee mode, try to canonicalize the destination operand
10096 to be first in the comparison - this helps reload to avoid extra
10098 if (!sse_comparison_operator (operands[1], VOIDmode)
10099 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10101 rtx tmp = ix86_compare_op0;
10102 ix86_compare_op0 = ix86_compare_op1;
10103 ix86_compare_op1 = tmp;
10104 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10105 VOIDmode, ix86_compare_op0,
10108 /* Similarly try to manage result to be first operand of conditional
10109 move. We also don't support the NE comparison on SSE, so try to
10111 if ((rtx_equal_p (operands[0], operands[3])
10112 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10113 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10115 rtx tmp = operands[2];
10116 operands[2] = operands[3];
10118 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10119 (GET_CODE (operands[1])),
10120 VOIDmode, ix86_compare_op0,
10123 if (GET_MODE (operands[0]) == SFmode)
10124 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10125 operands[2], operands[3],
10126 ix86_compare_op0, ix86_compare_op1));
10128 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10129 operands[2], operands[3],
10130 ix86_compare_op0, ix86_compare_op1));
10134 /* The floating point conditional move instructions don't directly
10135 support conditions resulting from a signed integer comparison. */
10137 code = GET_CODE (operands[1]);
10138 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10140 /* The floating point conditional move instructions don't directly
10141 support signed integer comparisons. */
10143 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10145 if (second_test != NULL || bypass_test != NULL)
10147 tmp = gen_reg_rtx (QImode);
10148 ix86_expand_setcc (code, tmp);
10150 ix86_compare_op0 = tmp;
10151 ix86_compare_op1 = const0_rtx;
10152 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10154 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10156 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10157 emit_move_insn (tmp, operands[3]);
10160 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10162 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10163 emit_move_insn (tmp, operands[2]);
10167 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10168 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10173 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10174 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10179 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10180 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10188 /* Expand conditional increment or decrement using adb/sbb instructions.
10189 The default case using setcc followed by the conditional move can be
10190 done by generic code. */
10192 ix86_expand_int_addcc (rtx operands[])
10194 enum rtx_code code = GET_CODE (operands[1]);
10196 rtx val = const0_rtx;
10197 bool fpcmp = false;
10198 enum machine_mode mode = GET_MODE (operands[0]);
10200 if (operands[3] != const1_rtx
10201 && operands[3] != constm1_rtx)
10203 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10204 ix86_compare_op1, &compare_op))
10206 code = GET_CODE (compare_op);
10208 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10209 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10212 code = ix86_fp_compare_code_to_integer (code);
10219 PUT_CODE (compare_op,
10220 reverse_condition_maybe_unordered
10221 (GET_CODE (compare_op)));
10223 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10225 PUT_MODE (compare_op, mode);
10227 /* Construct either adc or sbb insn. */
10228 if ((code == LTU) == (operands[3] == constm1_rtx))
10230 switch (GET_MODE (operands[0]))
10233 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10236 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10239 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10242 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10250 switch (GET_MODE (operands[0]))
10253 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10256 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10259 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10262 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10268 return 1; /* DONE */
10272 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10273 works for floating pointer parameters and nonoffsetable memories.
10274 For pushes, it returns just stack offsets; the values will be saved
10275 in the right order. Maximally three parts are generated. */
10278 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10283 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10285 size = (GET_MODE_SIZE (mode) + 4) / 8;
10287 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10289 if (size < 2 || size > 3)
10292 /* Optimize constant pool reference to immediates. This is used by fp
10293 moves, that force all constants to memory to allow combining. */
10294 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10296 rtx tmp = maybe_get_pool_constant (operand);
10301 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10303 /* The only non-offsetable memories we handle are pushes. */
10304 if (! push_operand (operand, VOIDmode))
10307 operand = copy_rtx (operand);
10308 PUT_MODE (operand, Pmode);
10309 parts[0] = parts[1] = parts[2] = operand;
10311 else if (!TARGET_64BIT)
10313 if (mode == DImode)
10314 split_di (&operand, 1, &parts[0], &parts[1]);
10317 if (REG_P (operand))
10319 if (!reload_completed)
10321 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10322 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10324 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10326 else if (offsettable_memref_p (operand))
10328 operand = adjust_address (operand, SImode, 0);
10329 parts[0] = operand;
10330 parts[1] = adjust_address (operand, SImode, 4);
10332 parts[2] = adjust_address (operand, SImode, 8);
10334 else if (GET_CODE (operand) == CONST_DOUBLE)
10339 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10344 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10345 parts[2] = gen_int_mode (l[2], SImode);
10348 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10353 parts[1] = gen_int_mode (l[1], SImode);
10354 parts[0] = gen_int_mode (l[0], SImode);
10362 if (mode == TImode)
10363 split_ti (&operand, 1, &parts[0], &parts[1]);
10364 if (mode == XFmode || mode == TFmode)
10366 if (REG_P (operand))
10368 if (!reload_completed)
10370 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10371 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10373 else if (offsettable_memref_p (operand))
10375 operand = adjust_address (operand, DImode, 0);
10376 parts[0] = operand;
10377 parts[1] = adjust_address (operand, SImode, 8);
10379 else if (GET_CODE (operand) == CONST_DOUBLE)
10384 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10385 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10386 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10387 if (HOST_BITS_PER_WIDE_INT >= 64)
10390 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10391 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10394 parts[0] = immed_double_const (l[0], l[1], DImode);
10395 parts[1] = gen_int_mode (l[2], SImode);
10405 /* Emit insns to perform a move or push of DI, DF, and XF values.
10406 Return false when normal moves are needed; true when all required
10407 insns have been emitted. Operands 2-4 contain the input values
10408 int the correct order; operands 5-7 contain the output values. */
10411 ix86_split_long_move (rtx operands[])
10416 int collisions = 0;
10417 enum machine_mode mode = GET_MODE (operands[0]);
10419 /* The DFmode expanders may ask us to move double.
10420 For 64bit target this is single move. By hiding the fact
10421 here we simplify i386.md splitters. */
10422 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10424 /* Optimize constant pool reference to immediates. This is used by
10425 fp moves, that force all constants to memory to allow combining. */
10427 if (GET_CODE (operands[1]) == MEM
10428 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10429 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10430 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10431 if (push_operand (operands[0], VOIDmode))
10433 operands[0] = copy_rtx (operands[0]);
10434 PUT_MODE (operands[0], Pmode);
10437 operands[0] = gen_lowpart (DImode, operands[0]);
10438 operands[1] = gen_lowpart (DImode, operands[1]);
10439 emit_move_insn (operands[0], operands[1]);
10443 /* The only non-offsettable memory we handle is push. */
10444 if (push_operand (operands[0], VOIDmode))
10446 else if (GET_CODE (operands[0]) == MEM
10447 && ! offsettable_memref_p (operands[0]))
10450 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10451 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10453 /* When emitting push, take care for source operands on the stack. */
10454 if (push && GET_CODE (operands[1]) == MEM
10455 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10458 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10459 XEXP (part[1][2], 0));
10460 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10461 XEXP (part[1][1], 0));
10464 /* We need to do copy in the right order in case an address register
10465 of the source overlaps the destination. */
10466 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10468 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10470 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10473 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10476 /* Collision in the middle part can be handled by reordering. */
10477 if (collisions == 1 && nparts == 3
10478 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10481 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10482 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10485 /* If there are more collisions, we can't handle it by reordering.
10486 Do an lea to the last part and use only one colliding move. */
10487 else if (collisions > 1)
10493 base = part[0][nparts - 1];
10495 /* Handle the case when the last part isn't valid for lea.
10496 Happens in 64-bit mode storing the 12-byte XFmode. */
10497 if (GET_MODE (base) != Pmode)
10498 base = gen_rtx_REG (Pmode, REGNO (base));
10500 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10501 part[1][0] = replace_equiv_address (part[1][0], base);
10502 part[1][1] = replace_equiv_address (part[1][1],
10503 plus_constant (base, UNITS_PER_WORD));
10505 part[1][2] = replace_equiv_address (part[1][2],
10506 plus_constant (base, 8));
10516 /* We use only first 12 bytes of TFmode value, but for pushing we
10517 are required to adjust stack as if we were pushing real 16byte
10519 if (mode == TFmode && !TARGET_64BIT)
10520 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10522 emit_move_insn (part[0][2], part[1][2]);
10527 /* In 64bit mode we don't have 32bit push available. In case this is
10528 register, it is OK - we will just use larger counterpart. We also
10529 retype memory - these comes from attempt to avoid REX prefix on
10530 moving of second half of TFmode value. */
10531 if (GET_MODE (part[1][1]) == SImode)
10533 if (GET_CODE (part[1][1]) == MEM)
10534 part[1][1] = adjust_address (part[1][1], DImode, 0);
10535 else if (REG_P (part[1][1]))
10536 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10539 if (GET_MODE (part[1][0]) == SImode)
10540 part[1][0] = part[1][1];
10543 emit_move_insn (part[0][1], part[1][1]);
10544 emit_move_insn (part[0][0], part[1][0]);
10548 /* Choose correct order to not overwrite the source before it is copied. */
10549 if ((REG_P (part[0][0])
10550 && REG_P (part[1][1])
10551 && (REGNO (part[0][0]) == REGNO (part[1][1])
10553 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10555 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10559 operands[2] = part[0][2];
10560 operands[3] = part[0][1];
10561 operands[4] = part[0][0];
10562 operands[5] = part[1][2];
10563 operands[6] = part[1][1];
10564 operands[7] = part[1][0];
10568 operands[2] = part[0][1];
10569 operands[3] = part[0][0];
10570 operands[5] = part[1][1];
10571 operands[6] = part[1][0];
10578 operands[2] = part[0][0];
10579 operands[3] = part[0][1];
10580 operands[4] = part[0][2];
10581 operands[5] = part[1][0];
10582 operands[6] = part[1][1];
10583 operands[7] = part[1][2];
10587 operands[2] = part[0][0];
10588 operands[3] = part[0][1];
10589 operands[5] = part[1][0];
10590 operands[6] = part[1][1];
10593 emit_move_insn (operands[2], operands[5]);
10594 emit_move_insn (operands[3], operands[6]);
10596 emit_move_insn (operands[4], operands[7]);
10602 ix86_split_ashldi (rtx *operands, rtx scratch)
10604 rtx low[2], high[2];
10607 if (GET_CODE (operands[2]) == CONST_INT)
10609 split_di (operands, 2, low, high);
10610 count = INTVAL (operands[2]) & 63;
10614 emit_move_insn (high[0], low[1]);
10615 emit_move_insn (low[0], const0_rtx);
10618 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10622 if (!rtx_equal_p (operands[0], operands[1]))
10623 emit_move_insn (operands[0], operands[1]);
10624 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10625 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10630 if (!rtx_equal_p (operands[0], operands[1]))
10631 emit_move_insn (operands[0], operands[1]);
10633 split_di (operands, 1, low, high);
10635 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10636 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10638 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10640 if (! no_new_pseudos)
10641 scratch = force_reg (SImode, const0_rtx);
10643 emit_move_insn (scratch, const0_rtx);
10645 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10649 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10654 ix86_split_ashrdi (rtx *operands, rtx scratch)
10656 rtx low[2], high[2];
10659 if (GET_CODE (operands[2]) == CONST_INT)
10661 split_di (operands, 2, low, high);
10662 count = INTVAL (operands[2]) & 63;
10666 emit_move_insn (low[0], high[1]);
10668 if (! reload_completed)
10669 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10672 emit_move_insn (high[0], low[0]);
10673 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10677 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10681 if (!rtx_equal_p (operands[0], operands[1]))
10682 emit_move_insn (operands[0], operands[1]);
10683 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10684 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10689 if (!rtx_equal_p (operands[0], operands[1]))
10690 emit_move_insn (operands[0], operands[1]);
10692 split_di (operands, 1, low, high);
10694 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10695 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10697 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10699 if (! no_new_pseudos)
10700 scratch = gen_reg_rtx (SImode);
10701 emit_move_insn (scratch, high[0]);
10702 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10703 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10707 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10712 ix86_split_lshrdi (rtx *operands, rtx scratch)
10714 rtx low[2], high[2];
10717 if (GET_CODE (operands[2]) == CONST_INT)
10719 split_di (operands, 2, low, high);
10720 count = INTVAL (operands[2]) & 63;
10724 emit_move_insn (low[0], high[1]);
10725 emit_move_insn (high[0], const0_rtx);
10728 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10732 if (!rtx_equal_p (operands[0], operands[1]))
10733 emit_move_insn (operands[0], operands[1]);
10734 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10735 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10740 if (!rtx_equal_p (operands[0], operands[1]))
10741 emit_move_insn (operands[0], operands[1]);
10743 split_di (operands, 1, low, high);
10745 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10746 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10748 /* Heh. By reversing the arguments, we can reuse this pattern. */
10749 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10751 if (! no_new_pseudos)
10752 scratch = force_reg (SImode, const0_rtx);
10754 emit_move_insn (scratch, const0_rtx);
10756 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10760 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10764 /* Helper function for the string operations below. Dest VARIABLE whether
10765 it is aligned to VALUE bytes. If true, jump to the label. */
10767 ix86_expand_aligntest (rtx variable, int value)
10769 rtx label = gen_label_rtx ();
10770 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10771 if (GET_MODE (variable) == DImode)
10772 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10774 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10775 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10780 /* Adjust COUNTER by the VALUE. */
10782 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10784 if (GET_MODE (countreg) == DImode)
10785 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10787 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10790 /* Zero extend possibly SImode EXP to Pmode register. */
10792 ix86_zero_extend_to_Pmode (rtx exp)
10795 if (GET_MODE (exp) == VOIDmode)
10796 return force_reg (Pmode, exp);
10797 if (GET_MODE (exp) == Pmode)
10798 return copy_to_mode_reg (Pmode, exp);
10799 r = gen_reg_rtx (Pmode);
10800 emit_insn (gen_zero_extendsidi2 (r, exp));
10804 /* Expand string move (memcpy) operation. Use i386 string operations when
10805 profitable. expand_clrstr contains similar code. */
10807 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10809 rtx srcreg, destreg, countreg;
10810 enum machine_mode counter_mode;
10811 HOST_WIDE_INT align = 0;
10812 unsigned HOST_WIDE_INT count = 0;
10815 if (GET_CODE (align_exp) == CONST_INT)
10816 align = INTVAL (align_exp);
10818 /* Can't use any of this if the user has appropriated esi or edi. */
10819 if (global_regs[4] || global_regs[5])
10822 /* This simple hack avoids all inlining code and simplifies code below. */
10823 if (!TARGET_ALIGN_STRINGOPS)
10826 if (GET_CODE (count_exp) == CONST_INT)
10828 count = INTVAL (count_exp);
10829 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10833 /* Figure out proper mode for counter. For 32bits it is always SImode,
10834 for 64bits use SImode when possible, otherwise DImode.
10835 Set count to number of bytes copied when known at compile time. */
10836 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10837 || x86_64_zero_extended_value (count_exp))
10838 counter_mode = SImode;
10840 counter_mode = DImode;
10844 if (counter_mode != SImode && counter_mode != DImode)
10847 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10848 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10850 emit_insn (gen_cld ());
10852 /* When optimizing for size emit simple rep ; movsb instruction for
10853 counts not divisible by 4. */
10855 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10857 countreg = ix86_zero_extend_to_Pmode (count_exp);
10859 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10860 destreg, srcreg, countreg));
10862 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10863 destreg, srcreg, countreg));
10866 /* For constant aligned (or small unaligned) copies use rep movsl
10867 followed by code copying the rest. For PentiumPro ensure 8 byte
10868 alignment to allow rep movsl acceleration. */
10870 else if (count != 0
10872 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10873 || optimize_size || count < (unsigned int) 64))
10875 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10876 if (count & ~(size - 1))
10878 countreg = copy_to_mode_reg (counter_mode,
10879 GEN_INT ((count >> (size == 4 ? 2 : 3))
10880 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10881 countreg = ix86_zero_extend_to_Pmode (countreg);
10885 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10886 destreg, srcreg, countreg));
10888 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10889 destreg, srcreg, countreg));
10892 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10893 destreg, srcreg, countreg));
10895 if (size == 8 && (count & 0x04))
10896 emit_insn (gen_strmovsi (destreg, srcreg));
10898 emit_insn (gen_strmovhi (destreg, srcreg));
10900 emit_insn (gen_strmovqi (destreg, srcreg));
10902 /* The generic code based on the glibc implementation:
10903 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10904 allowing accelerated copying there)
10905 - copy the data using rep movsl
10906 - copy the rest. */
10911 int desired_alignment = (TARGET_PENTIUMPRO
10912 && (count == 0 || count >= (unsigned int) 260)
10913 ? 8 : UNITS_PER_WORD);
10915 /* In case we don't know anything about the alignment, default to
10916 library version, since it is usually equally fast and result in
10919 Also emit call when we know that the count is large and call overhead
10920 will not be important. */
10921 if (!TARGET_INLINE_ALL_STRINGOPS
10922 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10928 if (TARGET_SINGLE_STRINGOP)
10929 emit_insn (gen_cld ());
10931 countreg2 = gen_reg_rtx (Pmode);
10932 countreg = copy_to_mode_reg (counter_mode, count_exp);
10934 /* We don't use loops to align destination and to copy parts smaller
10935 than 4 bytes, because gcc is able to optimize such code better (in
10936 the case the destination or the count really is aligned, gcc is often
10937 able to predict the branches) and also it is friendlier to the
10938 hardware branch prediction.
10940 Using loops is beneficial for generic case, because we can
10941 handle small counts using the loops. Many CPUs (such as Athlon)
10942 have large REP prefix setup costs.
10944 This is quite costly. Maybe we can revisit this decision later or
10945 add some customizability to this code. */
10947 if (count == 0 && align < desired_alignment)
10949 label = gen_label_rtx ();
10950 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10951 LEU, 0, counter_mode, 1, label);
10955 rtx label = ix86_expand_aligntest (destreg, 1);
10956 emit_insn (gen_strmovqi (destreg, srcreg));
10957 ix86_adjust_counter (countreg, 1);
10958 emit_label (label);
10959 LABEL_NUSES (label) = 1;
10963 rtx label = ix86_expand_aligntest (destreg, 2);
10964 emit_insn (gen_strmovhi (destreg, srcreg));
10965 ix86_adjust_counter (countreg, 2);
10966 emit_label (label);
10967 LABEL_NUSES (label) = 1;
10969 if (align <= 4 && desired_alignment > 4)
10971 rtx label = ix86_expand_aligntest (destreg, 4);
10972 emit_insn (gen_strmovsi (destreg, srcreg));
10973 ix86_adjust_counter (countreg, 4);
10974 emit_label (label);
10975 LABEL_NUSES (label) = 1;
10978 if (label && desired_alignment > 4 && !TARGET_64BIT)
10980 emit_label (label);
10981 LABEL_NUSES (label) = 1;
10984 if (!TARGET_SINGLE_STRINGOP)
10985 emit_insn (gen_cld ());
10988 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10990 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10991 destreg, srcreg, countreg2));
10995 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10996 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10997 destreg, srcreg, countreg2));
11002 emit_label (label);
11003 LABEL_NUSES (label) = 1;
11005 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11006 emit_insn (gen_strmovsi (destreg, srcreg));
11007 if ((align <= 4 || count == 0) && TARGET_64BIT)
11009 rtx label = ix86_expand_aligntest (countreg, 4);
11010 emit_insn (gen_strmovsi (destreg, srcreg));
11011 emit_label (label);
11012 LABEL_NUSES (label) = 1;
11014 if (align > 2 && count != 0 && (count & 2))
11015 emit_insn (gen_strmovhi (destreg, srcreg));
11016 if (align <= 2 || count == 0)
11018 rtx label = ix86_expand_aligntest (countreg, 2);
11019 emit_insn (gen_strmovhi (destreg, srcreg));
11020 emit_label (label);
11021 LABEL_NUSES (label) = 1;
11023 if (align > 1 && count != 0 && (count & 1))
11024 emit_insn (gen_strmovqi (destreg, srcreg));
11025 if (align <= 1 || count == 0)
11027 rtx label = ix86_expand_aligntest (countreg, 1);
11028 emit_insn (gen_strmovqi (destreg, srcreg));
11029 emit_label (label);
11030 LABEL_NUSES (label) = 1;
11034 insns = get_insns ();
11037 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11042 /* Expand string clear operation (bzero). Use i386 string operations when
11043 profitable. expand_movstr contains similar code. */
11045 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11047 rtx destreg, zeroreg, countreg;
11048 enum machine_mode counter_mode;
11049 HOST_WIDE_INT align = 0;
11050 unsigned HOST_WIDE_INT count = 0;
11052 if (GET_CODE (align_exp) == CONST_INT)
11053 align = INTVAL (align_exp);
11055 /* Can't use any of this if the user has appropriated esi. */
11056 if (global_regs[4])
11059 /* This simple hack avoids all inlining code and simplifies code below. */
11060 if (!TARGET_ALIGN_STRINGOPS)
11063 if (GET_CODE (count_exp) == CONST_INT)
11065 count = INTVAL (count_exp);
11066 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11069 /* Figure out proper mode for counter. For 32bits it is always SImode,
11070 for 64bits use SImode when possible, otherwise DImode.
11071 Set count to number of bytes copied when known at compile time. */
11072 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11073 || x86_64_zero_extended_value (count_exp))
11074 counter_mode = SImode;
11076 counter_mode = DImode;
11078 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11080 emit_insn (gen_cld ());
11082 /* When optimizing for size emit simple rep ; movsb instruction for
11083 counts not divisible by 4. */
11085 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11087 countreg = ix86_zero_extend_to_Pmode (count_exp);
11088 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11090 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11091 destreg, countreg));
11093 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11094 destreg, countreg));
11096 else if (count != 0
11098 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11099 || optimize_size || count < (unsigned int) 64))
11101 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11102 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11103 if (count & ~(size - 1))
11105 countreg = copy_to_mode_reg (counter_mode,
11106 GEN_INT ((count >> (size == 4 ? 2 : 3))
11107 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11108 countreg = ix86_zero_extend_to_Pmode (countreg);
11112 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11113 destreg, countreg));
11115 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11116 destreg, countreg));
11119 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11120 destreg, countreg));
11122 if (size == 8 && (count & 0x04))
11123 emit_insn (gen_strsetsi (destreg,
11124 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11126 emit_insn (gen_strsethi (destreg,
11127 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11129 emit_insn (gen_strsetqi (destreg,
11130 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11136 /* Compute desired alignment of the string operation. */
11137 int desired_alignment = (TARGET_PENTIUMPRO
11138 && (count == 0 || count >= (unsigned int) 260)
11139 ? 8 : UNITS_PER_WORD);
11141 /* In case we don't know anything about the alignment, default to
11142 library version, since it is usually equally fast and result in
11145 Also emit call when we know that the count is large and call overhead
11146 will not be important. */
11147 if (!TARGET_INLINE_ALL_STRINGOPS
11148 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11151 if (TARGET_SINGLE_STRINGOP)
11152 emit_insn (gen_cld ());
11154 countreg2 = gen_reg_rtx (Pmode);
11155 countreg = copy_to_mode_reg (counter_mode, count_exp);
11156 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11158 if (count == 0 && align < desired_alignment)
11160 label = gen_label_rtx ();
11161 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11162 LEU, 0, counter_mode, 1, label);
11166 rtx label = ix86_expand_aligntest (destreg, 1);
11167 emit_insn (gen_strsetqi (destreg,
11168 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11169 ix86_adjust_counter (countreg, 1);
11170 emit_label (label);
11171 LABEL_NUSES (label) = 1;
11175 rtx label = ix86_expand_aligntest (destreg, 2);
11176 emit_insn (gen_strsethi (destreg,
11177 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11178 ix86_adjust_counter (countreg, 2);
11179 emit_label (label);
11180 LABEL_NUSES (label) = 1;
11182 if (align <= 4 && desired_alignment > 4)
11184 rtx label = ix86_expand_aligntest (destreg, 4);
11185 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11186 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11188 ix86_adjust_counter (countreg, 4);
11189 emit_label (label);
11190 LABEL_NUSES (label) = 1;
11193 if (label && desired_alignment > 4 && !TARGET_64BIT)
11195 emit_label (label);
11196 LABEL_NUSES (label) = 1;
11200 if (!TARGET_SINGLE_STRINGOP)
11201 emit_insn (gen_cld ());
11204 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11206 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11207 destreg, countreg2));
11211 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11212 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11213 destreg, countreg2));
11217 emit_label (label);
11218 LABEL_NUSES (label) = 1;
11221 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11222 emit_insn (gen_strsetsi (destreg,
11223 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11224 if (TARGET_64BIT && (align <= 4 || count == 0))
11226 rtx label = ix86_expand_aligntest (countreg, 4);
11227 emit_insn (gen_strsetsi (destreg,
11228 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11229 emit_label (label);
11230 LABEL_NUSES (label) = 1;
11232 if (align > 2 && count != 0 && (count & 2))
11233 emit_insn (gen_strsethi (destreg,
11234 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11235 if (align <= 2 || count == 0)
11237 rtx label = ix86_expand_aligntest (countreg, 2);
11238 emit_insn (gen_strsethi (destreg,
11239 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11240 emit_label (label);
11241 LABEL_NUSES (label) = 1;
11243 if (align > 1 && count != 0 && (count & 1))
11244 emit_insn (gen_strsetqi (destreg,
11245 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11246 if (align <= 1 || count == 0)
11248 rtx label = ix86_expand_aligntest (countreg, 1);
11249 emit_insn (gen_strsetqi (destreg,
11250 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11251 emit_label (label);
11252 LABEL_NUSES (label) = 1;
11257 /* Expand strlen. */
11259 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11261 rtx addr, scratch1, scratch2, scratch3, scratch4;
11263 /* The generic case of strlen expander is long. Avoid it's
11264 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11266 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11267 && !TARGET_INLINE_ALL_STRINGOPS
11269 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11272 addr = force_reg (Pmode, XEXP (src, 0));
11273 scratch1 = gen_reg_rtx (Pmode);
11275 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11278 /* Well it seems that some optimizer does not combine a call like
11279 foo(strlen(bar), strlen(bar));
11280 when the move and the subtraction is done here. It does calculate
11281 the length just once when these instructions are done inside of
11282 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11283 often used and I use one fewer register for the lifetime of
11284 output_strlen_unroll() this is better. */
11286 emit_move_insn (out, addr);
11288 ix86_expand_strlensi_unroll_1 (out, align);
11290 /* strlensi_unroll_1 returns the address of the zero at the end of
11291 the string, like memchr(), so compute the length by subtracting
11292 the start address. */
11294 emit_insn (gen_subdi3 (out, out, addr));
11296 emit_insn (gen_subsi3 (out, out, addr));
11300 scratch2 = gen_reg_rtx (Pmode);
11301 scratch3 = gen_reg_rtx (Pmode);
11302 scratch4 = force_reg (Pmode, constm1_rtx);
11304 emit_move_insn (scratch3, addr);
11305 eoschar = force_reg (QImode, eoschar);
11307 emit_insn (gen_cld ());
11310 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11311 align, scratch4, scratch3));
11312 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11313 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11317 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11318 align, scratch4, scratch3));
11319 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11320 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11326 /* Expand the appropriate insns for doing strlen if not just doing
11329 out = result, initialized with the start address
11330 align_rtx = alignment of the address.
11331 scratch = scratch register, initialized with the startaddress when
11332 not aligned, otherwise undefined
11334 This is just the body. It needs the initializations mentioned above and
11335 some address computing at the end. These things are done in i386.md. */
11338 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11342 rtx align_2_label = NULL_RTX;
11343 rtx align_3_label = NULL_RTX;
11344 rtx align_4_label = gen_label_rtx ();
11345 rtx end_0_label = gen_label_rtx ();
11347 rtx tmpreg = gen_reg_rtx (SImode);
11348 rtx scratch = gen_reg_rtx (SImode);
11352 if (GET_CODE (align_rtx) == CONST_INT)
11353 align = INTVAL (align_rtx);
11355 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11357 /* Is there a known alignment and is it less than 4? */
11360 rtx scratch1 = gen_reg_rtx (Pmode);
11361 emit_move_insn (scratch1, out);
11362 /* Is there a known alignment and is it not 2? */
11365 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11366 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11368 /* Leave just the 3 lower bits. */
11369 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11370 NULL_RTX, 0, OPTAB_WIDEN);
11372 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11373 Pmode, 1, align_4_label);
11374 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11375 Pmode, 1, align_2_label);
11376 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11377 Pmode, 1, align_3_label);
11381 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11382 check if is aligned to 4 - byte. */
11384 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11385 NULL_RTX, 0, OPTAB_WIDEN);
11387 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11388 Pmode, 1, align_4_label);
11391 mem = gen_rtx_MEM (QImode, out);
11393 /* Now compare the bytes. */
11395 /* Compare the first n unaligned byte on a byte per byte basis. */
11396 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11397 QImode, 1, end_0_label);
11399 /* Increment the address. */
11401 emit_insn (gen_adddi3 (out, out, const1_rtx));
11403 emit_insn (gen_addsi3 (out, out, const1_rtx));
11405 /* Not needed with an alignment of 2 */
11408 emit_label (align_2_label);
11410 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11414 emit_insn (gen_adddi3 (out, out, const1_rtx));
11416 emit_insn (gen_addsi3 (out, out, const1_rtx));
11418 emit_label (align_3_label);
11421 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11425 emit_insn (gen_adddi3 (out, out, const1_rtx));
11427 emit_insn (gen_addsi3 (out, out, const1_rtx));
11430 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11431 align this loop. It gives only huge programs, but does not help to
11433 emit_label (align_4_label);
11435 mem = gen_rtx_MEM (SImode, out);
11436 emit_move_insn (scratch, mem);
11438 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11440 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11442 /* This formula yields a nonzero result iff one of the bytes is zero.
11443 This saves three branches inside loop and many cycles. */
11445 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11446 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11447 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11448 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11449 gen_int_mode (0x80808080, SImode)));
11450 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11455 rtx reg = gen_reg_rtx (SImode);
11456 rtx reg2 = gen_reg_rtx (Pmode);
11457 emit_move_insn (reg, tmpreg);
11458 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11460 /* If zero is not in the first two bytes, move two bytes forward. */
11461 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11462 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11463 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11464 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11465 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11468 /* Emit lea manually to avoid clobbering of flags. */
11469 emit_insn (gen_rtx_SET (SImode, reg2,
11470 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11472 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11473 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11474 emit_insn (gen_rtx_SET (VOIDmode, out,
11475 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11482 rtx end_2_label = gen_label_rtx ();
11483 /* Is zero in the first two bytes? */
11485 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11486 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11487 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11488 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11489 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11491 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11492 JUMP_LABEL (tmp) = end_2_label;
11494 /* Not in the first two. Move two bytes forward. */
11495 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11497 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11499 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11501 emit_label (end_2_label);
11505 /* Avoid branch in fixing the byte. */
11506 tmpreg = gen_lowpart (QImode, tmpreg);
11507 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11508 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11510 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11512 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11514 emit_label (end_0_label);
11518 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11519 rtx pop, int sibcall)
11521 rtx use = NULL, call;
11523 if (pop == const0_rtx)
11525 if (TARGET_64BIT && pop)
11529 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11530 fnaddr = machopic_indirect_call_target (fnaddr);
11532 /* Static functions and indirect calls don't need the pic register. */
11533 if (! TARGET_64BIT && flag_pic
11534 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11535 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11536 use_reg (&use, pic_offset_table_rtx);
11538 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11540 rtx al = gen_rtx_REG (QImode, 0);
11541 emit_move_insn (al, callarg2);
11542 use_reg (&use, al);
11544 #endif /* TARGET_MACHO */
11546 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11548 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11549 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11551 if (sibcall && TARGET_64BIT
11552 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11555 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11556 fnaddr = gen_rtx_REG (Pmode, 40);
11557 emit_move_insn (fnaddr, addr);
11558 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11561 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11563 call = gen_rtx_SET (VOIDmode, retval, call);
11566 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11567 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11568 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11571 call = emit_call_insn (call);
11573 CALL_INSN_FUNCTION_USAGE (call) = use;
11577 /* Clear stack slot assignments remembered from previous functions.
11578 This is called from INIT_EXPANDERS once before RTL is emitted for each
11581 static struct machine_function *
11582 ix86_init_machine_status (void)
11584 struct machine_function *f;
11586 f = ggc_alloc_cleared (sizeof (struct machine_function));
11587 f->use_fast_prologue_epilogue_nregs = -1;
11592 /* Return a MEM corresponding to a stack slot with mode MODE.
11593 Allocate a new slot if necessary.
11595 The RTL for a function can have several slots available: N is
11596 which slot to use. */
11599 assign_386_stack_local (enum machine_mode mode, int n)
11601 struct stack_local_entry *s;
11603 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11606 for (s = ix86_stack_locals; s; s = s->next)
11607 if (s->mode == mode && s->n == n)
11610 s = (struct stack_local_entry *)
11611 ggc_alloc (sizeof (struct stack_local_entry));
11614 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11616 s->next = ix86_stack_locals;
11617 ix86_stack_locals = s;
11621 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11623 static GTY(()) rtx ix86_tls_symbol;
11625 ix86_tls_get_addr (void)
11628 if (!ix86_tls_symbol)
11630 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11631 (TARGET_GNU_TLS && !TARGET_64BIT)
11632 ? "___tls_get_addr"
11633 : "__tls_get_addr");
11636 return ix86_tls_symbol;
11639 /* Calculate the length of the memory address in the instruction
11640 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11643 memory_address_length (rtx addr)
11645 struct ix86_address parts;
11646 rtx base, index, disp;
11649 if (GET_CODE (addr) == PRE_DEC
11650 || GET_CODE (addr) == POST_INC
11651 || GET_CODE (addr) == PRE_MODIFY
11652 || GET_CODE (addr) == POST_MODIFY)
11655 if (! ix86_decompose_address (addr, &parts))
11659 index = parts.index;
11664 - esp as the base always wants an index,
11665 - ebp as the base always wants a displacement. */
11667 /* Register Indirect. */
11668 if (base && !index && !disp)
11670 /* esp (for its index) and ebp (for its displacement) need
11671 the two-byte modrm form. */
11672 if (addr == stack_pointer_rtx
11673 || addr == arg_pointer_rtx
11674 || addr == frame_pointer_rtx
11675 || addr == hard_frame_pointer_rtx)
11679 /* Direct Addressing. */
11680 else if (disp && !base && !index)
11685 /* Find the length of the displacement constant. */
11688 if (GET_CODE (disp) == CONST_INT
11689 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11695 /* ebp always wants a displacement. */
11696 else if (base == hard_frame_pointer_rtx)
11699 /* An index requires the two-byte modrm form... */
11701 /* ...like esp, which always wants an index. */
11702 || base == stack_pointer_rtx
11703 || base == arg_pointer_rtx
11704 || base == frame_pointer_rtx)
11711 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11712 is set, expect that insn have 8bit immediate alternative. */
11714 ix86_attr_length_immediate_default (rtx insn, int shortform)
11718 extract_insn_cached (insn);
11719 for (i = recog_data.n_operands - 1; i >= 0; --i)
11720 if (CONSTANT_P (recog_data.operand[i]))
11725 && GET_CODE (recog_data.operand[i]) == CONST_INT
11726 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11730 switch (get_attr_mode (insn))
11741 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11746 fatal_insn ("unknown insn mode", insn);
11752 /* Compute default value for "length_address" attribute. */
11754 ix86_attr_length_address_default (rtx insn)
11758 if (get_attr_type (insn) == TYPE_LEA)
11760 rtx set = PATTERN (insn);
11761 if (GET_CODE (set) == SET)
11763 else if (GET_CODE (set) == PARALLEL
11764 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11765 set = XVECEXP (set, 0, 0);
11768 #ifdef ENABLE_CHECKING
11774 return memory_address_length (SET_SRC (set));
11777 extract_insn_cached (insn);
11778 for (i = recog_data.n_operands - 1; i >= 0; --i)
11779 if (GET_CODE (recog_data.operand[i]) == MEM)
11781 return memory_address_length (XEXP (recog_data.operand[i], 0));
11787 /* Return the maximum number of instructions a cpu can issue. */
11790 ix86_issue_rate (void)
11794 case PROCESSOR_PENTIUM:
11798 case PROCESSOR_PENTIUMPRO:
11799 case PROCESSOR_PENTIUM4:
11800 case PROCESSOR_ATHLON:
11809 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11810 by DEP_INSN and nothing set by DEP_INSN. */
11813 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11817 /* Simplify the test for uninteresting insns. */
11818 if (insn_type != TYPE_SETCC
11819 && insn_type != TYPE_ICMOV
11820 && insn_type != TYPE_FCMOV
11821 && insn_type != TYPE_IBR)
11824 if ((set = single_set (dep_insn)) != 0)
11826 set = SET_DEST (set);
11829 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11830 && XVECLEN (PATTERN (dep_insn), 0) == 2
11831 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11832 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11834 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11835 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11840 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11843 /* This test is true if the dependent insn reads the flags but
11844 not any other potentially set register. */
11845 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11848 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11854 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11855 address with operands set by DEP_INSN. */
11858 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11862 if (insn_type == TYPE_LEA
11865 addr = PATTERN (insn);
11866 if (GET_CODE (addr) == SET)
11868 else if (GET_CODE (addr) == PARALLEL
11869 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11870 addr = XVECEXP (addr, 0, 0);
11873 addr = SET_SRC (addr);
11878 extract_insn_cached (insn);
11879 for (i = recog_data.n_operands - 1; i >= 0; --i)
11880 if (GET_CODE (recog_data.operand[i]) == MEM)
11882 addr = XEXP (recog_data.operand[i], 0);
11889 return modified_in_p (addr, dep_insn);
11893 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11895 enum attr_type insn_type, dep_insn_type;
11896 enum attr_memory memory, dep_memory;
11898 int dep_insn_code_number;
11900 /* Anti and output dependencies have zero cost on all CPUs. */
11901 if (REG_NOTE_KIND (link) != 0)
11904 dep_insn_code_number = recog_memoized (dep_insn);
11906 /* If we can't recognize the insns, we can't really do anything. */
11907 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11910 insn_type = get_attr_type (insn);
11911 dep_insn_type = get_attr_type (dep_insn);
11915 case PROCESSOR_PENTIUM:
11916 /* Address Generation Interlock adds a cycle of latency. */
11917 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11920 /* ??? Compares pair with jump/setcc. */
11921 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11924 /* Floating point stores require value to be ready one cycle earlier. */
11925 if (insn_type == TYPE_FMOV
11926 && get_attr_memory (insn) == MEMORY_STORE
11927 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11931 case PROCESSOR_PENTIUMPRO:
11932 memory = get_attr_memory (insn);
11933 dep_memory = get_attr_memory (dep_insn);
11935 /* Since we can't represent delayed latencies of load+operation,
11936 increase the cost here for non-imov insns. */
11937 if (dep_insn_type != TYPE_IMOV
11938 && dep_insn_type != TYPE_FMOV
11939 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11942 /* INT->FP conversion is expensive. */
11943 if (get_attr_fp_int_src (dep_insn))
11946 /* There is one cycle extra latency between an FP op and a store. */
11947 if (insn_type == TYPE_FMOV
11948 && (set = single_set (dep_insn)) != NULL_RTX
11949 && (set2 = single_set (insn)) != NULL_RTX
11950 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11951 && GET_CODE (SET_DEST (set2)) == MEM)
11954 /* Show ability of reorder buffer to hide latency of load by executing
11955 in parallel with previous instruction in case
11956 previous instruction is not needed to compute the address. */
11957 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11958 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11960 /* Claim moves to take one cycle, as core can issue one load
11961 at time and the next load can start cycle later. */
11962 if (dep_insn_type == TYPE_IMOV
11963 || dep_insn_type == TYPE_FMOV)
11971 memory = get_attr_memory (insn);
11972 dep_memory = get_attr_memory (dep_insn);
11973 /* The esp dependency is resolved before the instruction is really
11975 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11976 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11979 /* Since we can't represent delayed latencies of load+operation,
11980 increase the cost here for non-imov insns. */
11981 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11982 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11984 /* INT->FP conversion is expensive. */
11985 if (get_attr_fp_int_src (dep_insn))
11988 /* Show ability of reorder buffer to hide latency of load by executing
11989 in parallel with previous instruction in case
11990 previous instruction is not needed to compute the address. */
11991 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11992 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11994 /* Claim moves to take one cycle, as core can issue one load
11995 at time and the next load can start cycle later. */
11996 if (dep_insn_type == TYPE_IMOV
11997 || dep_insn_type == TYPE_FMOV)
12006 case PROCESSOR_ATHLON:
12008 memory = get_attr_memory (insn);
12009 dep_memory = get_attr_memory (dep_insn);
12011 /* Show ability of reorder buffer to hide latency of load by executing
12012 in parallel with previous instruction in case
12013 previous instruction is not needed to compute the address. */
12014 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12015 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12017 enum attr_unit unit = get_attr_unit (insn);
12020 /* Because of the difference between the length of integer and
12021 floating unit pipeline preparation stages, the memory operands
12022 for floating point are cheaper.
12024 ??? For Athlon it the difference is most probably 2. */
12025 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12028 loadcost = TARGET_ATHLON ? 2 : 0;
12030 if (cost >= loadcost)
12045 struct ppro_sched_data
12048 int issued_this_cycle;
12052 static enum attr_ppro_uops
12053 ix86_safe_ppro_uops (rtx insn)
12055 if (recog_memoized (insn) >= 0)
12056 return get_attr_ppro_uops (insn);
12058 return PPRO_UOPS_MANY;
12062 ix86_dump_ppro_packet (FILE *dump)
12064 if (ix86_sched_data.ppro.decode[0])
12066 fprintf (dump, "PPRO packet: %d",
12067 INSN_UID (ix86_sched_data.ppro.decode[0]));
12068 if (ix86_sched_data.ppro.decode[1])
12069 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12070 if (ix86_sched_data.ppro.decode[2])
12071 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12072 fputc ('\n', dump);
12076 /* We're beginning a new block. Initialize data structures as necessary. */
12079 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12080 int sched_verbose ATTRIBUTE_UNUSED,
12081 int veclen ATTRIBUTE_UNUSED)
12083 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12086 /* Shift INSN to SLOT, and shift everything else down. */
12089 ix86_reorder_insn (rtx *insnp, rtx *slot)
12095 insnp[0] = insnp[1];
12096 while (++insnp != slot);
12102 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12105 enum attr_ppro_uops cur_uops;
12106 int issued_this_cycle;
12110 /* At this point .ppro.decode contains the state of the three
12111 decoders from last "cycle". That is, those insns that were
12112 actually independent. But here we're scheduling for the
12113 decoder, and we may find things that are decodable in the
12116 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12117 issued_this_cycle = 0;
12120 cur_uops = ix86_safe_ppro_uops (*insnp);
12122 /* If the decoders are empty, and we've a complex insn at the
12123 head of the priority queue, let it issue without complaint. */
12124 if (decode[0] == NULL)
12126 if (cur_uops == PPRO_UOPS_MANY)
12128 decode[0] = *insnp;
12132 /* Otherwise, search for a 2-4 uop unsn to issue. */
12133 while (cur_uops != PPRO_UOPS_FEW)
12135 if (insnp == ready)
12137 cur_uops = ix86_safe_ppro_uops (*--insnp);
12140 /* If so, move it to the head of the line. */
12141 if (cur_uops == PPRO_UOPS_FEW)
12142 ix86_reorder_insn (insnp, e_ready);
12144 /* Issue the head of the queue. */
12145 issued_this_cycle = 1;
12146 decode[0] = *e_ready--;
12149 /* Look for simple insns to fill in the other two slots. */
12150 for (i = 1; i < 3; ++i)
12151 if (decode[i] == NULL)
12153 if (ready > e_ready)
12157 cur_uops = ix86_safe_ppro_uops (*insnp);
12158 while (cur_uops != PPRO_UOPS_ONE)
12160 if (insnp == ready)
12162 cur_uops = ix86_safe_ppro_uops (*--insnp);
12165 /* Found one. Move it to the head of the queue and issue it. */
12166 if (cur_uops == PPRO_UOPS_ONE)
12168 ix86_reorder_insn (insnp, e_ready);
12169 decode[i] = *e_ready--;
12170 issued_this_cycle++;
12174 /* ??? Didn't find one. Ideally, here we would do a lazy split
12175 of 2-uop insns, issue one and queue the other. */
12179 if (issued_this_cycle == 0)
12180 issued_this_cycle = 1;
12181 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12184 /* We are about to being issuing insns for this clock cycle.
12185 Override the default sort algorithm to better slot instructions. */
12187 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12188 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12189 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12191 int n_ready = *n_readyp;
12192 rtx *e_ready = ready + n_ready - 1;
12194 /* Make sure to go ahead and initialize key items in
12195 ix86_sched_data if we are not going to bother trying to
12196 reorder the ready queue. */
12199 ix86_sched_data.ppro.issued_this_cycle = 1;
12208 case PROCESSOR_PENTIUMPRO:
12209 ix86_sched_reorder_ppro (ready, e_ready);
12214 return ix86_issue_rate ();
12217 /* We are about to issue INSN. Return the number of insns left on the
12218 ready queue that can be issued this cycle. */
12221 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12222 int can_issue_more)
12228 return can_issue_more - 1;
12230 case PROCESSOR_PENTIUMPRO:
12232 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12234 if (uops == PPRO_UOPS_MANY)
12237 ix86_dump_ppro_packet (dump);
12238 ix86_sched_data.ppro.decode[0] = insn;
12239 ix86_sched_data.ppro.decode[1] = NULL;
12240 ix86_sched_data.ppro.decode[2] = NULL;
12242 ix86_dump_ppro_packet (dump);
12243 ix86_sched_data.ppro.decode[0] = NULL;
12245 else if (uops == PPRO_UOPS_FEW)
12248 ix86_dump_ppro_packet (dump);
12249 ix86_sched_data.ppro.decode[0] = insn;
12250 ix86_sched_data.ppro.decode[1] = NULL;
12251 ix86_sched_data.ppro.decode[2] = NULL;
12255 for (i = 0; i < 3; ++i)
12256 if (ix86_sched_data.ppro.decode[i] == NULL)
12258 ix86_sched_data.ppro.decode[i] = insn;
12266 ix86_dump_ppro_packet (dump);
12267 ix86_sched_data.ppro.decode[0] = NULL;
12268 ix86_sched_data.ppro.decode[1] = NULL;
12269 ix86_sched_data.ppro.decode[2] = NULL;
12273 return --ix86_sched_data.ppro.issued_this_cycle;
12278 ia32_use_dfa_pipeline_interface (void)
12280 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12285 /* How many alternative schedules to try. This should be as wide as the
12286 scheduling freedom in the DFA, but no wider. Making this value too
12287 large results extra work for the scheduler. */
12290 ia32_multipass_dfa_lookahead (void)
12292 if (ix86_tune == PROCESSOR_PENTIUM)
12299 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12300 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12304 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12309 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12311 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12315 /* Subroutine of above to actually do the updating by recursively walking
12319 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12322 enum rtx_code code = GET_CODE (x);
12323 const char *format_ptr = GET_RTX_FORMAT (code);
12326 if (code == MEM && XEXP (x, 0) == dstreg)
12327 MEM_COPY_ATTRIBUTES (x, dstref);
12328 else if (code == MEM && XEXP (x, 0) == srcreg)
12329 MEM_COPY_ATTRIBUTES (x, srcref);
12331 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12333 if (*format_ptr == 'e')
12334 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12336 else if (*format_ptr == 'E')
12337 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12338 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12343 /* Compute the alignment given to a constant that is being placed in memory.
12344 EXP is the constant and ALIGN is the alignment that the object would
12346 The value of this function is used instead of that alignment to align
12350 ix86_constant_alignment (tree exp, int align)
12352 if (TREE_CODE (exp) == REAL_CST)
12354 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12356 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12359 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12366 /* Compute the alignment for a static variable.
12367 TYPE is the data type, and ALIGN is the alignment that
12368 the object would ordinarily have. The value of this function is used
12369 instead of that alignment to align the object. */
12372 ix86_data_alignment (tree type, int align)
12374 if (AGGREGATE_TYPE_P (type)
12375 && TYPE_SIZE (type)
12376 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12377 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12378 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12381 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12382 to 16byte boundary. */
12385 if (AGGREGATE_TYPE_P (type)
12386 && TYPE_SIZE (type)
12387 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12388 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12389 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12393 if (TREE_CODE (type) == ARRAY_TYPE)
12395 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12397 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12400 else if (TREE_CODE (type) == COMPLEX_TYPE)
12403 if (TYPE_MODE (type) == DCmode && align < 64)
12405 if (TYPE_MODE (type) == XCmode && align < 128)
12408 else if ((TREE_CODE (type) == RECORD_TYPE
12409 || TREE_CODE (type) == UNION_TYPE
12410 || TREE_CODE (type) == QUAL_UNION_TYPE)
12411 && TYPE_FIELDS (type))
12413 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12415 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12418 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12419 || TREE_CODE (type) == INTEGER_TYPE)
12421 if (TYPE_MODE (type) == DFmode && align < 64)
12423 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12430 /* Compute the alignment for a local variable.
12431 TYPE is the data type, and ALIGN is the alignment that
12432 the object would ordinarily have. The value of this macro is used
12433 instead of that alignment to align the object. */
12436 ix86_local_alignment (tree type, int align)
12438 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12439 to 16byte boundary. */
12442 if (AGGREGATE_TYPE_P (type)
12443 && TYPE_SIZE (type)
12444 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12445 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12446 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12449 if (TREE_CODE (type) == ARRAY_TYPE)
12451 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12453 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12456 else if (TREE_CODE (type) == COMPLEX_TYPE)
12458 if (TYPE_MODE (type) == DCmode && align < 64)
12460 if (TYPE_MODE (type) == XCmode && align < 128)
12463 else if ((TREE_CODE (type) == RECORD_TYPE
12464 || TREE_CODE (type) == UNION_TYPE
12465 || TREE_CODE (type) == QUAL_UNION_TYPE)
12466 && TYPE_FIELDS (type))
12468 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12470 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12473 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12474 || TREE_CODE (type) == INTEGER_TYPE)
12477 if (TYPE_MODE (type) == DFmode && align < 64)
12479 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12485 /* Emit RTL insns to initialize the variable parts of a trampoline.
12486 FNADDR is an RTX for the address of the function's pure code.
12487 CXT is an RTX for the static chain value for the function. */
12489 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12493 /* Compute offset from the end of the jmp to the target function. */
12494 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12495 plus_constant (tramp, 10),
12496 NULL_RTX, 1, OPTAB_DIRECT);
12497 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12498 gen_int_mode (0xb9, QImode));
12499 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12500 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12501 gen_int_mode (0xe9, QImode));
12502 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12507 /* Try to load address using shorter movl instead of movabs.
12508 We may want to support movq for kernel mode, but kernel does not use
12509 trampolines at the moment. */
12510 if (x86_64_zero_extended_value (fnaddr))
12512 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12513 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12514 gen_int_mode (0xbb41, HImode));
12515 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12516 gen_lowpart (SImode, fnaddr));
12521 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12522 gen_int_mode (0xbb49, HImode));
12523 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12527 /* Load static chain using movabs to r10. */
12528 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12529 gen_int_mode (0xba49, HImode));
12530 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12533 /* Jump to the r11 */
12534 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12535 gen_int_mode (0xff49, HImode));
12536 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12537 gen_int_mode (0xe3, QImode));
12539 if (offset > TRAMPOLINE_SIZE)
12543 #ifdef TRANSFER_FROM_TRAMPOLINE
12544 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12545 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12549 #define def_builtin(MASK, NAME, TYPE, CODE) \
12551 if ((MASK) & target_flags \
12552 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12553 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12554 NULL, NULL_TREE); \
12557 struct builtin_description
12559 const unsigned int mask;
12560 const enum insn_code icode;
12561 const char *const name;
12562 const enum ix86_builtins code;
12563 const enum rtx_code comparison;
12564 const unsigned int flag;
12567 static const struct builtin_description bdesc_comi[] =
12569 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12570 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12571 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12572 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12573 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12574 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12575 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12576 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12577 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12578 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12579 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12580 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12581 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12582 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12583 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12584 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12585 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12586 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12587 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12588 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12589 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12590 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12591 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12592 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12595 static const struct builtin_description bdesc_2arg[] =
12598 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12599 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12600 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12601 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12602 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12603 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12604 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12605 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12607 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12608 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12609 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12610 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12611 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12612 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12613 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12614 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12615 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12616 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12617 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12618 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12619 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12620 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12621 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12622 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12623 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12624 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12625 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12626 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12628 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12629 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12630 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12631 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12633 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12634 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12635 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12636 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12638 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12639 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12640 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12641 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12642 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12645 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12646 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12647 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12648 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12649 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12650 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12651 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12652 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12654 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12655 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12656 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12657 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12658 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12659 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12660 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12661 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12663 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12664 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12665 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12667 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12668 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12669 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12670 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12672 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12673 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12675 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12676 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12677 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12678 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12679 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12680 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12682 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12683 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12684 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12685 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12687 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12688 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12689 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12690 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12691 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12692 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12695 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12696 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12697 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12699 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12701 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12703 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12704 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12705 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12707 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12708 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12710 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12711 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12712 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12713 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12714 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12715 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12717 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12718 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12719 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12720 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12722 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12723 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12729 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12736 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12737 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12738 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12739 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12740 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12741 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12742 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12743 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12744 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12745 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12746 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12747 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12748 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12749 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12750 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12751 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12752 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12753 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12754 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12756 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12758 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12762 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12776 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12782 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12783 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12784 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12785 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12786 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12787 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12796 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12837 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12852 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12857 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12858 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12859 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12860 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12861 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12862 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12865 static const struct builtin_description bdesc_1arg[] =
12867 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12868 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12870 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12871 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12872 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12874 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12875 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12876 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12877 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12878 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12879 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12901 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12902 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12911 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12912 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12913 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12917 ix86_init_builtins (void)
12920 ix86_init_mmx_sse_builtins ();
12923 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12924 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12927 ix86_init_mmx_sse_builtins (void)
12929 const struct builtin_description * d;
12932 tree pchar_type_node = build_pointer_type (char_type_node);
12933 tree pcchar_type_node = build_pointer_type (
12934 build_type_variant (char_type_node, 1, 0));
12935 tree pfloat_type_node = build_pointer_type (float_type_node);
12936 tree pcfloat_type_node = build_pointer_type (
12937 build_type_variant (float_type_node, 1, 0));
12938 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12939 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12940 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12943 tree int_ftype_v4sf_v4sf
12944 = build_function_type_list (integer_type_node,
12945 V4SF_type_node, V4SF_type_node, NULL_TREE);
12946 tree v4si_ftype_v4sf_v4sf
12947 = build_function_type_list (V4SI_type_node,
12948 V4SF_type_node, V4SF_type_node, NULL_TREE);
12949 /* MMX/SSE/integer conversions. */
12950 tree int_ftype_v4sf
12951 = build_function_type_list (integer_type_node,
12952 V4SF_type_node, NULL_TREE);
12953 tree int64_ftype_v4sf
12954 = build_function_type_list (long_long_integer_type_node,
12955 V4SF_type_node, NULL_TREE);
12956 tree int_ftype_v8qi
12957 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12958 tree v4sf_ftype_v4sf_int
12959 = build_function_type_list (V4SF_type_node,
12960 V4SF_type_node, integer_type_node, NULL_TREE);
12961 tree v4sf_ftype_v4sf_int64
12962 = build_function_type_list (V4SF_type_node,
12963 V4SF_type_node, long_long_integer_type_node,
12965 tree v4sf_ftype_v4sf_v2si
12966 = build_function_type_list (V4SF_type_node,
12967 V4SF_type_node, V2SI_type_node, NULL_TREE);
12968 tree int_ftype_v4hi_int
12969 = build_function_type_list (integer_type_node,
12970 V4HI_type_node, integer_type_node, NULL_TREE);
12971 tree v4hi_ftype_v4hi_int_int
12972 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12973 integer_type_node, integer_type_node,
12975 /* Miscellaneous. */
12976 tree v8qi_ftype_v4hi_v4hi
12977 = build_function_type_list (V8QI_type_node,
12978 V4HI_type_node, V4HI_type_node, NULL_TREE);
12979 tree v4hi_ftype_v2si_v2si
12980 = build_function_type_list (V4HI_type_node,
12981 V2SI_type_node, V2SI_type_node, NULL_TREE);
12982 tree v4sf_ftype_v4sf_v4sf_int
12983 = build_function_type_list (V4SF_type_node,
12984 V4SF_type_node, V4SF_type_node,
12985 integer_type_node, NULL_TREE);
12986 tree v2si_ftype_v4hi_v4hi
12987 = build_function_type_list (V2SI_type_node,
12988 V4HI_type_node, V4HI_type_node, NULL_TREE);
12989 tree v4hi_ftype_v4hi_int
12990 = build_function_type_list (V4HI_type_node,
12991 V4HI_type_node, integer_type_node, NULL_TREE);
12992 tree v4hi_ftype_v4hi_di
12993 = build_function_type_list (V4HI_type_node,
12994 V4HI_type_node, long_long_unsigned_type_node,
12996 tree v2si_ftype_v2si_di
12997 = build_function_type_list (V2SI_type_node,
12998 V2SI_type_node, long_long_unsigned_type_node,
13000 tree void_ftype_void
13001 = build_function_type (void_type_node, void_list_node);
13002 tree void_ftype_unsigned
13003 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13004 tree void_ftype_unsigned_unsigned
13005 = build_function_type_list (void_type_node, unsigned_type_node,
13006 unsigned_type_node, NULL_TREE);
13007 tree void_ftype_pcvoid_unsigned_unsigned
13008 = build_function_type_list (void_type_node, const_ptr_type_node,
13009 unsigned_type_node, unsigned_type_node,
13011 tree unsigned_ftype_void
13012 = build_function_type (unsigned_type_node, void_list_node);
13014 = build_function_type (long_long_unsigned_type_node, void_list_node);
13015 tree v4sf_ftype_void
13016 = build_function_type (V4SF_type_node, void_list_node);
13017 tree v2si_ftype_v4sf
13018 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13019 /* Loads/stores. */
13020 tree void_ftype_v8qi_v8qi_pchar
13021 = build_function_type_list (void_type_node,
13022 V8QI_type_node, V8QI_type_node,
13023 pchar_type_node, NULL_TREE);
13024 tree v4sf_ftype_pcfloat
13025 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13026 /* @@@ the type is bogus */
13027 tree v4sf_ftype_v4sf_pv2si
13028 = build_function_type_list (V4SF_type_node,
13029 V4SF_type_node, pv2si_type_node, NULL_TREE);
13030 tree void_ftype_pv2si_v4sf
13031 = build_function_type_list (void_type_node,
13032 pv2si_type_node, V4SF_type_node, NULL_TREE);
13033 tree void_ftype_pfloat_v4sf
13034 = build_function_type_list (void_type_node,
13035 pfloat_type_node, V4SF_type_node, NULL_TREE);
13036 tree void_ftype_pdi_di
13037 = build_function_type_list (void_type_node,
13038 pdi_type_node, long_long_unsigned_type_node,
13040 tree void_ftype_pv2di_v2di
13041 = build_function_type_list (void_type_node,
13042 pv2di_type_node, V2DI_type_node, NULL_TREE);
13043 /* Normal vector unops. */
13044 tree v4sf_ftype_v4sf
13045 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13047 /* Normal vector binops. */
13048 tree v4sf_ftype_v4sf_v4sf
13049 = build_function_type_list (V4SF_type_node,
13050 V4SF_type_node, V4SF_type_node, NULL_TREE);
13051 tree v8qi_ftype_v8qi_v8qi
13052 = build_function_type_list (V8QI_type_node,
13053 V8QI_type_node, V8QI_type_node, NULL_TREE);
13054 tree v4hi_ftype_v4hi_v4hi
13055 = build_function_type_list (V4HI_type_node,
13056 V4HI_type_node, V4HI_type_node, NULL_TREE);
13057 tree v2si_ftype_v2si_v2si
13058 = build_function_type_list (V2SI_type_node,
13059 V2SI_type_node, V2SI_type_node, NULL_TREE);
13060 tree di_ftype_di_di
13061 = build_function_type_list (long_long_unsigned_type_node,
13062 long_long_unsigned_type_node,
13063 long_long_unsigned_type_node, NULL_TREE);
13065 tree v2si_ftype_v2sf
13066 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13067 tree v2sf_ftype_v2si
13068 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13069 tree v2si_ftype_v2si
13070 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13071 tree v2sf_ftype_v2sf
13072 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13073 tree v2sf_ftype_v2sf_v2sf
13074 = build_function_type_list (V2SF_type_node,
13075 V2SF_type_node, V2SF_type_node, NULL_TREE);
13076 tree v2si_ftype_v2sf_v2sf
13077 = build_function_type_list (V2SI_type_node,
13078 V2SF_type_node, V2SF_type_node, NULL_TREE);
13079 tree pint_type_node = build_pointer_type (integer_type_node);
13080 tree pcint_type_node = build_pointer_type (
13081 build_type_variant (integer_type_node, 1, 0));
13082 tree pdouble_type_node = build_pointer_type (double_type_node);
13083 tree pcdouble_type_node = build_pointer_type (
13084 build_type_variant (double_type_node, 1, 0));
13085 tree int_ftype_v2df_v2df
13086 = build_function_type_list (integer_type_node,
13087 V2DF_type_node, V2DF_type_node, NULL_TREE);
13090 = build_function_type (intTI_type_node, void_list_node);
13091 tree v2di_ftype_void
13092 = build_function_type (V2DI_type_node, void_list_node);
13093 tree ti_ftype_ti_ti
13094 = build_function_type_list (intTI_type_node,
13095 intTI_type_node, intTI_type_node, NULL_TREE);
13096 tree void_ftype_pcvoid
13097 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13099 = build_function_type_list (V2DI_type_node,
13100 long_long_unsigned_type_node, NULL_TREE);
13102 = build_function_type_list (long_long_unsigned_type_node,
13103 V2DI_type_node, NULL_TREE);
13104 tree v4sf_ftype_v4si
13105 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13106 tree v4si_ftype_v4sf
13107 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13108 tree v2df_ftype_v4si
13109 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13110 tree v4si_ftype_v2df
13111 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13112 tree v2si_ftype_v2df
13113 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13114 tree v4sf_ftype_v2df
13115 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13116 tree v2df_ftype_v2si
13117 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13118 tree v2df_ftype_v4sf
13119 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13120 tree int_ftype_v2df
13121 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13122 tree int64_ftype_v2df
13123 = build_function_type_list (long_long_integer_type_node,
13124 V2DF_type_node, NULL_TREE);
13125 tree v2df_ftype_v2df_int
13126 = build_function_type_list (V2DF_type_node,
13127 V2DF_type_node, integer_type_node, NULL_TREE);
13128 tree v2df_ftype_v2df_int64
13129 = build_function_type_list (V2DF_type_node,
13130 V2DF_type_node, long_long_integer_type_node,
13132 tree v4sf_ftype_v4sf_v2df
13133 = build_function_type_list (V4SF_type_node,
13134 V4SF_type_node, V2DF_type_node, NULL_TREE);
13135 tree v2df_ftype_v2df_v4sf
13136 = build_function_type_list (V2DF_type_node,
13137 V2DF_type_node, V4SF_type_node, NULL_TREE);
13138 tree v2df_ftype_v2df_v2df_int
13139 = build_function_type_list (V2DF_type_node,
13140 V2DF_type_node, V2DF_type_node,
13143 tree v2df_ftype_v2df_pv2si
13144 = build_function_type_list (V2DF_type_node,
13145 V2DF_type_node, pv2si_type_node, NULL_TREE);
13146 tree void_ftype_pv2si_v2df
13147 = build_function_type_list (void_type_node,
13148 pv2si_type_node, V2DF_type_node, NULL_TREE);
13149 tree void_ftype_pdouble_v2df
13150 = build_function_type_list (void_type_node,
13151 pdouble_type_node, V2DF_type_node, NULL_TREE);
13152 tree void_ftype_pint_int
13153 = build_function_type_list (void_type_node,
13154 pint_type_node, integer_type_node, NULL_TREE);
13155 tree void_ftype_v16qi_v16qi_pchar
13156 = build_function_type_list (void_type_node,
13157 V16QI_type_node, V16QI_type_node,
13158 pchar_type_node, NULL_TREE);
13159 tree v2df_ftype_pcdouble
13160 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13161 tree v2df_ftype_v2df_v2df
13162 = build_function_type_list (V2DF_type_node,
13163 V2DF_type_node, V2DF_type_node, NULL_TREE);
13164 tree v16qi_ftype_v16qi_v16qi
13165 = build_function_type_list (V16QI_type_node,
13166 V16QI_type_node, V16QI_type_node, NULL_TREE);
13167 tree v8hi_ftype_v8hi_v8hi
13168 = build_function_type_list (V8HI_type_node,
13169 V8HI_type_node, V8HI_type_node, NULL_TREE);
13170 tree v4si_ftype_v4si_v4si
13171 = build_function_type_list (V4SI_type_node,
13172 V4SI_type_node, V4SI_type_node, NULL_TREE);
13173 tree v2di_ftype_v2di_v2di
13174 = build_function_type_list (V2DI_type_node,
13175 V2DI_type_node, V2DI_type_node, NULL_TREE);
13176 tree v2di_ftype_v2df_v2df
13177 = build_function_type_list (V2DI_type_node,
13178 V2DF_type_node, V2DF_type_node, NULL_TREE);
13179 tree v2df_ftype_v2df
13180 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13181 tree v2df_ftype_double
13182 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13183 tree v2df_ftype_double_double
13184 = build_function_type_list (V2DF_type_node,
13185 double_type_node, double_type_node, NULL_TREE);
13186 tree int_ftype_v8hi_int
13187 = build_function_type_list (integer_type_node,
13188 V8HI_type_node, integer_type_node, NULL_TREE);
13189 tree v8hi_ftype_v8hi_int_int
13190 = build_function_type_list (V8HI_type_node,
13191 V8HI_type_node, integer_type_node,
13192 integer_type_node, NULL_TREE);
13193 tree v2di_ftype_v2di_int
13194 = build_function_type_list (V2DI_type_node,
13195 V2DI_type_node, integer_type_node, NULL_TREE);
13196 tree v4si_ftype_v4si_int
13197 = build_function_type_list (V4SI_type_node,
13198 V4SI_type_node, integer_type_node, NULL_TREE);
13199 tree v8hi_ftype_v8hi_int
13200 = build_function_type_list (V8HI_type_node,
13201 V8HI_type_node, integer_type_node, NULL_TREE);
13202 tree v8hi_ftype_v8hi_v2di
13203 = build_function_type_list (V8HI_type_node,
13204 V8HI_type_node, V2DI_type_node, NULL_TREE);
13205 tree v4si_ftype_v4si_v2di
13206 = build_function_type_list (V4SI_type_node,
13207 V4SI_type_node, V2DI_type_node, NULL_TREE);
13208 tree v4si_ftype_v8hi_v8hi
13209 = build_function_type_list (V4SI_type_node,
13210 V8HI_type_node, V8HI_type_node, NULL_TREE);
13211 tree di_ftype_v8qi_v8qi
13212 = build_function_type_list (long_long_unsigned_type_node,
13213 V8QI_type_node, V8QI_type_node, NULL_TREE);
13214 tree v2di_ftype_v16qi_v16qi
13215 = build_function_type_list (V2DI_type_node,
13216 V16QI_type_node, V16QI_type_node, NULL_TREE);
13217 tree int_ftype_v16qi
13218 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13219 tree v16qi_ftype_pcchar
13220 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13221 tree void_ftype_pchar_v16qi
13222 = build_function_type_list (void_type_node,
13223 pchar_type_node, V16QI_type_node, NULL_TREE);
13224 tree v4si_ftype_pcint
13225 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13226 tree void_ftype_pcint_v4si
13227 = build_function_type_list (void_type_node,
13228 pcint_type_node, V4SI_type_node, NULL_TREE);
13229 tree v2di_ftype_v2di
13230 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13232 /* Add all builtins that are more or less simple operations on two
13234 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13236 /* Use one of the operands; the target can have a different mode for
13237 mask-generating compares. */
13238 enum machine_mode mode;
13243 mode = insn_data[d->icode].operand[1].mode;
13248 type = v16qi_ftype_v16qi_v16qi;
13251 type = v8hi_ftype_v8hi_v8hi;
13254 type = v4si_ftype_v4si_v4si;
13257 type = v2di_ftype_v2di_v2di;
13260 type = v2df_ftype_v2df_v2df;
13263 type = ti_ftype_ti_ti;
13266 type = v4sf_ftype_v4sf_v4sf;
13269 type = v8qi_ftype_v8qi_v8qi;
13272 type = v4hi_ftype_v4hi_v4hi;
13275 type = v2si_ftype_v2si_v2si;
13278 type = di_ftype_di_di;
13285 /* Override for comparisons. */
13286 if (d->icode == CODE_FOR_maskcmpv4sf3
13287 || d->icode == CODE_FOR_maskncmpv4sf3
13288 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13289 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13290 type = v4si_ftype_v4sf_v4sf;
13292 if (d->icode == CODE_FOR_maskcmpv2df3
13293 || d->icode == CODE_FOR_maskncmpv2df3
13294 || d->icode == CODE_FOR_vmmaskcmpv2df3
13295 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13296 type = v2di_ftype_v2df_v2df;
13298 def_builtin (d->mask, d->name, type, d->code);
13301 /* Add the remaining MMX insns with somewhat more complicated types. */
13302 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13303 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13304 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13305 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13306 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13308 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13309 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13310 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13312 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13313 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13315 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13316 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13318 /* comi/ucomi insns. */
13319 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13320 if (d->mask == MASK_SSE2)
13321 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13323 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13325 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13326 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13327 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13329 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13330 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13331 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13332 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13333 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13334 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13335 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13336 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13337 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13338 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13339 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13341 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13342 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13344 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13346 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13347 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13348 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13349 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13350 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13351 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13353 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13354 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13355 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13356 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13358 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13359 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13360 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13361 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13363 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13365 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13367 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13368 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13369 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13370 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13371 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13372 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13374 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13376 /* Original 3DNow! */
13377 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13378 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13379 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13380 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13382 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13383 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13384 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13385 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13386 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13387 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13388 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13389 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13390 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13391 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13392 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13393 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13394 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13395 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13396 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13398 /* 3DNow! extension as used in the Athlon CPU. */
13399 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13400 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13401 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13402 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13403 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13404 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13406 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13409 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13410 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13414 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13416 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13417 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13418 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13419 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13420 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13421 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13423 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13424 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13425 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13426 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13428 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13429 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13430 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13431 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13432 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13434 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13435 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13436 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13437 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13439 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13440 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13442 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13444 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13445 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13447 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13449 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13450 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13451 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13455 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13456 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13457 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13458 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13460 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13461 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13462 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13464 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13465 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13466 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13467 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13469 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13470 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13471 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13472 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13473 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13474 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13475 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13477 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13478 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13479 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13481 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13482 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13483 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13484 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13485 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13486 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13487 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13489 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13495 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13505 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13517 /* Prescott New Instructions. */
13518 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13519 void_ftype_pcvoid_unsigned_unsigned,
13520 IX86_BUILTIN_MONITOR);
13521 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13522 void_ftype_unsigned_unsigned,
13523 IX86_BUILTIN_MWAIT);
13524 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13526 IX86_BUILTIN_MOVSHDUP);
13527 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13529 IX86_BUILTIN_MOVSLDUP);
13530 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13531 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13532 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13533 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13534 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13535 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13538 /* Errors in the source file can cause expand_expr to return const0_rtx
13539 where we expect a vector. To avoid crashing, use one of the vector
13540 clear instructions. */
13542 safe_vector_operand (rtx x, enum machine_mode mode)
13544 if (x != const0_rtx)
13546 x = gen_reg_rtx (mode);
13548 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13549 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13550 : gen_rtx_SUBREG (DImode, x, 0)));
13552 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13553 : gen_rtx_SUBREG (V4SFmode, x, 0),
13554 CONST0_RTX (V4SFmode)));
13558 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13561 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13564 tree arg0 = TREE_VALUE (arglist);
13565 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13566 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13567 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13568 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13569 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13570 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13572 if (VECTOR_MODE_P (mode0))
13573 op0 = safe_vector_operand (op0, mode0);
13574 if (VECTOR_MODE_P (mode1))
13575 op1 = safe_vector_operand (op1, mode1);
13578 || GET_MODE (target) != tmode
13579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13580 target = gen_reg_rtx (tmode);
13582 if (GET_MODE (op1) == SImode && mode1 == TImode)
13584 rtx x = gen_reg_rtx (V4SImode);
13585 emit_insn (gen_sse2_loadd (x, op1));
13586 op1 = gen_lowpart (TImode, x);
13589 /* In case the insn wants input operands in modes different from
13590 the result, abort. */
13591 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13592 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13595 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13596 op0 = copy_to_mode_reg (mode0, op0);
13597 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13598 op1 = copy_to_mode_reg (mode1, op1);
13600 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13601 yet one of the two must not be a memory. This is normally enforced
13602 by expanders, but we didn't bother to create one here. */
13603 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13604 op0 = copy_to_mode_reg (mode0, op0);
13606 pat = GEN_FCN (icode) (target, op0, op1);
13613 /* Subroutine of ix86_expand_builtin to take care of stores. */
13616 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13619 tree arg0 = TREE_VALUE (arglist);
13620 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13621 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13622 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13623 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13624 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13626 if (VECTOR_MODE_P (mode1))
13627 op1 = safe_vector_operand (op1, mode1);
13629 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13630 op1 = copy_to_mode_reg (mode1, op1);
13632 pat = GEN_FCN (icode) (op0, op1);
13638 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13641 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13642 rtx target, int do_load)
13645 tree arg0 = TREE_VALUE (arglist);
13646 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13647 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13648 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13651 || GET_MODE (target) != tmode
13652 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13653 target = gen_reg_rtx (tmode);
13655 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13658 if (VECTOR_MODE_P (mode0))
13659 op0 = safe_vector_operand (op0, mode0);
13661 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13662 op0 = copy_to_mode_reg (mode0, op0);
13665 pat = GEN_FCN (icode) (target, op0);
13672 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13673 sqrtss, rsqrtss, rcpss. */
13676 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13679 tree arg0 = TREE_VALUE (arglist);
13680 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13685 || GET_MODE (target) != tmode
13686 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13687 target = gen_reg_rtx (tmode);
13689 if (VECTOR_MODE_P (mode0))
13690 op0 = safe_vector_operand (op0, mode0);
13692 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13693 op0 = copy_to_mode_reg (mode0, op0);
13696 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13697 op1 = copy_to_mode_reg (mode0, op1);
13699 pat = GEN_FCN (icode) (target, op0, op1);
13706 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13709 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13713 tree arg0 = TREE_VALUE (arglist);
13714 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13715 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13716 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13718 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13719 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13720 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13721 enum rtx_code comparison = d->comparison;
13723 if (VECTOR_MODE_P (mode0))
13724 op0 = safe_vector_operand (op0, mode0);
13725 if (VECTOR_MODE_P (mode1))
13726 op1 = safe_vector_operand (op1, mode1);
13728 /* Swap operands if we have a comparison that isn't available in
13732 rtx tmp = gen_reg_rtx (mode1);
13733 emit_move_insn (tmp, op1);
13739 || GET_MODE (target) != tmode
13740 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13741 target = gen_reg_rtx (tmode);
13743 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13744 op0 = copy_to_mode_reg (mode0, op0);
13745 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13746 op1 = copy_to_mode_reg (mode1, op1);
13748 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13749 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13756 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13759 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13763 tree arg0 = TREE_VALUE (arglist);
13764 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13765 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13766 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13768 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13769 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13770 enum rtx_code comparison = d->comparison;
13772 if (VECTOR_MODE_P (mode0))
13773 op0 = safe_vector_operand (op0, mode0);
13774 if (VECTOR_MODE_P (mode1))
13775 op1 = safe_vector_operand (op1, mode1);
13777 /* Swap operands if we have a comparison that isn't available in
13786 target = gen_reg_rtx (SImode);
13787 emit_move_insn (target, const0_rtx);
13788 target = gen_rtx_SUBREG (QImode, target, 0);
13790 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13791 op0 = copy_to_mode_reg (mode0, op0);
13792 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13793 op1 = copy_to_mode_reg (mode1, op1);
13795 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13796 pat = GEN_FCN (d->icode) (op0, op1);
13800 emit_insn (gen_rtx_SET (VOIDmode,
13801 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13802 gen_rtx_fmt_ee (comparison, QImode,
13806 return SUBREG_REG (target);
13809 /* Expand an expression EXP that calls a built-in function,
13810 with result going to TARGET if that's convenient
13811 (and in mode MODE if that's convenient).
13812 SUBTARGET may be used as the target for computing one of EXP's operands.
13813 IGNORE is nonzero if the value is to be ignored. */
13816 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13817 enum machine_mode mode ATTRIBUTE_UNUSED,
13818 int ignore ATTRIBUTE_UNUSED)
13820 const struct builtin_description *d;
13822 enum insn_code icode;
13823 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13824 tree arglist = TREE_OPERAND (exp, 1);
13825 tree arg0, arg1, arg2;
13826 rtx op0, op1, op2, pat;
13827 enum machine_mode tmode, mode0, mode1, mode2;
13828 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13832 case IX86_BUILTIN_EMMS:
13833 emit_insn (gen_emms ());
13836 case IX86_BUILTIN_SFENCE:
13837 emit_insn (gen_sfence ());
13840 case IX86_BUILTIN_PEXTRW:
13841 case IX86_BUILTIN_PEXTRW128:
13842 icode = (fcode == IX86_BUILTIN_PEXTRW
13843 ? CODE_FOR_mmx_pextrw
13844 : CODE_FOR_sse2_pextrw);
13845 arg0 = TREE_VALUE (arglist);
13846 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13847 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13848 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13849 tmode = insn_data[icode].operand[0].mode;
13850 mode0 = insn_data[icode].operand[1].mode;
13851 mode1 = insn_data[icode].operand[2].mode;
13853 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13854 op0 = copy_to_mode_reg (mode0, op0);
13855 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13857 error ("selector must be an integer constant in the range 0..%i",
13858 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13859 return gen_reg_rtx (tmode);
13862 || GET_MODE (target) != tmode
13863 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13864 target = gen_reg_rtx (tmode);
13865 pat = GEN_FCN (icode) (target, op0, op1);
13871 case IX86_BUILTIN_PINSRW:
13872 case IX86_BUILTIN_PINSRW128:
13873 icode = (fcode == IX86_BUILTIN_PINSRW
13874 ? CODE_FOR_mmx_pinsrw
13875 : CODE_FOR_sse2_pinsrw);
13876 arg0 = TREE_VALUE (arglist);
13877 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13879 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13880 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13881 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13882 tmode = insn_data[icode].operand[0].mode;
13883 mode0 = insn_data[icode].operand[1].mode;
13884 mode1 = insn_data[icode].operand[2].mode;
13885 mode2 = insn_data[icode].operand[3].mode;
13887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13888 op0 = copy_to_mode_reg (mode0, op0);
13889 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13890 op1 = copy_to_mode_reg (mode1, op1);
13891 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13893 error ("selector must be an integer constant in the range 0..%i",
13894 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13898 || GET_MODE (target) != tmode
13899 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13900 target = gen_reg_rtx (tmode);
13901 pat = GEN_FCN (icode) (target, op0, op1, op2);
13907 case IX86_BUILTIN_MASKMOVQ:
13908 case IX86_BUILTIN_MASKMOVDQU:
13909 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13910 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13911 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13912 : CODE_FOR_sse2_maskmovdqu));
13913 /* Note the arg order is different from the operand order. */
13914 arg1 = TREE_VALUE (arglist);
13915 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13916 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13917 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13918 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13919 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13920 mode0 = insn_data[icode].operand[0].mode;
13921 mode1 = insn_data[icode].operand[1].mode;
13922 mode2 = insn_data[icode].operand[2].mode;
13924 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13925 op0 = copy_to_mode_reg (mode0, op0);
13926 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13927 op1 = copy_to_mode_reg (mode1, op1);
13928 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13929 op2 = copy_to_mode_reg (mode2, op2);
13930 pat = GEN_FCN (icode) (op0, op1, op2);
13936 case IX86_BUILTIN_SQRTSS:
13937 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13938 case IX86_BUILTIN_RSQRTSS:
13939 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13940 case IX86_BUILTIN_RCPSS:
13941 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13943 case IX86_BUILTIN_LOADAPS:
13944 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13946 case IX86_BUILTIN_LOADUPS:
13947 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13949 case IX86_BUILTIN_STOREAPS:
13950 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13952 case IX86_BUILTIN_STOREUPS:
13953 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13955 case IX86_BUILTIN_LOADSS:
13956 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13958 case IX86_BUILTIN_STORESS:
13959 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13961 case IX86_BUILTIN_LOADHPS:
13962 case IX86_BUILTIN_LOADLPS:
13963 case IX86_BUILTIN_LOADHPD:
13964 case IX86_BUILTIN_LOADLPD:
13965 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13966 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13967 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13968 : CODE_FOR_sse2_movlpd);
13969 arg0 = TREE_VALUE (arglist);
13970 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13971 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13972 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13973 tmode = insn_data[icode].operand[0].mode;
13974 mode0 = insn_data[icode].operand[1].mode;
13975 mode1 = insn_data[icode].operand[2].mode;
13977 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13978 op0 = copy_to_mode_reg (mode0, op0);
13979 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13981 || GET_MODE (target) != tmode
13982 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13983 target = gen_reg_rtx (tmode);
13984 pat = GEN_FCN (icode) (target, op0, op1);
13990 case IX86_BUILTIN_STOREHPS:
13991 case IX86_BUILTIN_STORELPS:
13992 case IX86_BUILTIN_STOREHPD:
13993 case IX86_BUILTIN_STORELPD:
13994 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13995 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13996 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13997 : CODE_FOR_sse2_movlpd);
13998 arg0 = TREE_VALUE (arglist);
13999 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14002 mode0 = insn_data[icode].operand[1].mode;
14003 mode1 = insn_data[icode].operand[2].mode;
14005 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14006 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14007 op1 = copy_to_mode_reg (mode1, op1);
14009 pat = GEN_FCN (icode) (op0, op0, op1);
14015 case IX86_BUILTIN_MOVNTPS:
14016 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14017 case IX86_BUILTIN_MOVNTQ:
14018 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14020 case IX86_BUILTIN_LDMXCSR:
14021 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14022 target = assign_386_stack_local (SImode, 0);
14023 emit_move_insn (target, op0);
14024 emit_insn (gen_ldmxcsr (target));
14027 case IX86_BUILTIN_STMXCSR:
14028 target = assign_386_stack_local (SImode, 0);
14029 emit_insn (gen_stmxcsr (target));
14030 return copy_to_mode_reg (SImode, target);
14032 case IX86_BUILTIN_SHUFPS:
14033 case IX86_BUILTIN_SHUFPD:
14034 icode = (fcode == IX86_BUILTIN_SHUFPS
14035 ? CODE_FOR_sse_shufps
14036 : CODE_FOR_sse2_shufpd);
14037 arg0 = TREE_VALUE (arglist);
14038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14043 tmode = insn_data[icode].operand[0].mode;
14044 mode0 = insn_data[icode].operand[1].mode;
14045 mode1 = insn_data[icode].operand[2].mode;
14046 mode2 = insn_data[icode].operand[3].mode;
14048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14049 op0 = copy_to_mode_reg (mode0, op0);
14050 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14051 op1 = copy_to_mode_reg (mode1, op1);
14052 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14054 /* @@@ better error message */
14055 error ("mask must be an immediate");
14056 return gen_reg_rtx (tmode);
14059 || GET_MODE (target) != tmode
14060 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14061 target = gen_reg_rtx (tmode);
14062 pat = GEN_FCN (icode) (target, op0, op1, op2);
14068 case IX86_BUILTIN_PSHUFW:
14069 case IX86_BUILTIN_PSHUFD:
14070 case IX86_BUILTIN_PSHUFHW:
14071 case IX86_BUILTIN_PSHUFLW:
14072 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14073 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14074 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14075 : CODE_FOR_mmx_pshufw);
14076 arg0 = TREE_VALUE (arglist);
14077 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14078 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14079 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14080 tmode = insn_data[icode].operand[0].mode;
14081 mode1 = insn_data[icode].operand[1].mode;
14082 mode2 = insn_data[icode].operand[2].mode;
14084 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14085 op0 = copy_to_mode_reg (mode1, op0);
14086 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14088 /* @@@ better error message */
14089 error ("mask must be an immediate");
14093 || GET_MODE (target) != tmode
14094 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14095 target = gen_reg_rtx (tmode);
14096 pat = GEN_FCN (icode) (target, op0, op1);
14102 case IX86_BUILTIN_PSLLDQI128:
14103 case IX86_BUILTIN_PSRLDQI128:
14104 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14105 : CODE_FOR_sse2_lshrti3);
14106 arg0 = TREE_VALUE (arglist);
14107 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14108 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14109 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14110 tmode = insn_data[icode].operand[0].mode;
14111 mode1 = insn_data[icode].operand[1].mode;
14112 mode2 = insn_data[icode].operand[2].mode;
14114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14116 op0 = copy_to_reg (op0);
14117 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14119 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14121 error ("shift must be an immediate");
14124 target = gen_reg_rtx (V2DImode);
14125 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14131 case IX86_BUILTIN_FEMMS:
14132 emit_insn (gen_femms ());
14135 case IX86_BUILTIN_PAVGUSB:
14136 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14138 case IX86_BUILTIN_PF2ID:
14139 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14141 case IX86_BUILTIN_PFACC:
14142 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14144 case IX86_BUILTIN_PFADD:
14145 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14147 case IX86_BUILTIN_PFCMPEQ:
14148 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14150 case IX86_BUILTIN_PFCMPGE:
14151 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14153 case IX86_BUILTIN_PFCMPGT:
14154 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14156 case IX86_BUILTIN_PFMAX:
14157 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14159 case IX86_BUILTIN_PFMIN:
14160 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14162 case IX86_BUILTIN_PFMUL:
14163 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14165 case IX86_BUILTIN_PFRCP:
14166 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14168 case IX86_BUILTIN_PFRCPIT1:
14169 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14171 case IX86_BUILTIN_PFRCPIT2:
14172 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14174 case IX86_BUILTIN_PFRSQIT1:
14175 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14177 case IX86_BUILTIN_PFRSQRT:
14178 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14180 case IX86_BUILTIN_PFSUB:
14181 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14183 case IX86_BUILTIN_PFSUBR:
14184 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14186 case IX86_BUILTIN_PI2FD:
14187 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14189 case IX86_BUILTIN_PMULHRW:
14190 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14192 case IX86_BUILTIN_PF2IW:
14193 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14195 case IX86_BUILTIN_PFNACC:
14196 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14198 case IX86_BUILTIN_PFPNACC:
14199 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14201 case IX86_BUILTIN_PI2FW:
14202 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14204 case IX86_BUILTIN_PSWAPDSI:
14205 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14207 case IX86_BUILTIN_PSWAPDSF:
14208 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14210 case IX86_BUILTIN_SSE_ZERO:
14211 target = gen_reg_rtx (V4SFmode);
14212 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14215 case IX86_BUILTIN_MMX_ZERO:
14216 target = gen_reg_rtx (DImode);
14217 emit_insn (gen_mmx_clrdi (target));
14220 case IX86_BUILTIN_CLRTI:
14221 target = gen_reg_rtx (V2DImode);
14222 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14226 case IX86_BUILTIN_SQRTSD:
14227 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14228 case IX86_BUILTIN_LOADAPD:
14229 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14230 case IX86_BUILTIN_LOADUPD:
14231 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14233 case IX86_BUILTIN_STOREAPD:
14234 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14235 case IX86_BUILTIN_STOREUPD:
14236 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14238 case IX86_BUILTIN_LOADSD:
14239 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14241 case IX86_BUILTIN_STORESD:
14242 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14244 case IX86_BUILTIN_SETPD1:
14245 target = assign_386_stack_local (DFmode, 0);
14246 arg0 = TREE_VALUE (arglist);
14247 emit_move_insn (adjust_address (target, DFmode, 0),
14248 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14249 op0 = gen_reg_rtx (V2DFmode);
14250 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14251 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14254 case IX86_BUILTIN_SETPD:
14255 target = assign_386_stack_local (V2DFmode, 0);
14256 arg0 = TREE_VALUE (arglist);
14257 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14258 emit_move_insn (adjust_address (target, DFmode, 0),
14259 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14260 emit_move_insn (adjust_address (target, DFmode, 8),
14261 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14262 op0 = gen_reg_rtx (V2DFmode);
14263 emit_insn (gen_sse2_movapd (op0, target));
14266 case IX86_BUILTIN_LOADRPD:
14267 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14268 gen_reg_rtx (V2DFmode), 1);
14269 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14272 case IX86_BUILTIN_LOADPD1:
14273 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14274 gen_reg_rtx (V2DFmode), 1);
14275 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14278 case IX86_BUILTIN_STOREPD1:
14279 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14280 case IX86_BUILTIN_STORERPD:
14281 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14283 case IX86_BUILTIN_CLRPD:
14284 target = gen_reg_rtx (V2DFmode);
14285 emit_insn (gen_sse_clrv2df (target));
14288 case IX86_BUILTIN_MFENCE:
14289 emit_insn (gen_sse2_mfence ());
14291 case IX86_BUILTIN_LFENCE:
14292 emit_insn (gen_sse2_lfence ());
14295 case IX86_BUILTIN_CLFLUSH:
14296 arg0 = TREE_VALUE (arglist);
14297 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14298 icode = CODE_FOR_sse2_clflush;
14299 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14300 op0 = copy_to_mode_reg (Pmode, op0);
14302 emit_insn (gen_sse2_clflush (op0));
14305 case IX86_BUILTIN_MOVNTPD:
14306 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14307 case IX86_BUILTIN_MOVNTDQ:
14308 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14309 case IX86_BUILTIN_MOVNTI:
14310 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14312 case IX86_BUILTIN_LOADDQA:
14313 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14314 case IX86_BUILTIN_LOADDQU:
14315 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14316 case IX86_BUILTIN_LOADD:
14317 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14319 case IX86_BUILTIN_STOREDQA:
14320 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14321 case IX86_BUILTIN_STOREDQU:
14322 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14323 case IX86_BUILTIN_STORED:
14324 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14326 case IX86_BUILTIN_MONITOR:
14327 arg0 = TREE_VALUE (arglist);
14328 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14329 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14330 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14331 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14332 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14334 op0 = copy_to_mode_reg (SImode, op0);
14336 op1 = copy_to_mode_reg (SImode, op1);
14338 op2 = copy_to_mode_reg (SImode, op2);
14339 emit_insn (gen_monitor (op0, op1, op2));
14342 case IX86_BUILTIN_MWAIT:
14343 arg0 = TREE_VALUE (arglist);
14344 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14345 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14346 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14348 op0 = copy_to_mode_reg (SImode, op0);
14350 op1 = copy_to_mode_reg (SImode, op1);
14351 emit_insn (gen_mwait (op0, op1));
14354 case IX86_BUILTIN_LOADDDUP:
14355 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14357 case IX86_BUILTIN_LDDQU:
14358 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14365 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14366 if (d->code == fcode)
14368 /* Compares are treated specially. */
14369 if (d->icode == CODE_FOR_maskcmpv4sf3
14370 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14371 || d->icode == CODE_FOR_maskncmpv4sf3
14372 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14373 || d->icode == CODE_FOR_maskcmpv2df3
14374 || d->icode == CODE_FOR_vmmaskcmpv2df3
14375 || d->icode == CODE_FOR_maskncmpv2df3
14376 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14377 return ix86_expand_sse_compare (d, arglist, target);
14379 return ix86_expand_binop_builtin (d->icode, arglist, target);
14382 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14383 if (d->code == fcode)
14384 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14386 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14387 if (d->code == fcode)
14388 return ix86_expand_sse_comi (d, arglist, target);
14390 /* @@@ Should really do something sensible here. */
14394 /* Store OPERAND to the memory after reload is completed. This means
14395 that we can't easily use assign_stack_local. */
14397 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14400 if (!reload_completed)
14402 if (TARGET_RED_ZONE)
14404 result = gen_rtx_MEM (mode,
14405 gen_rtx_PLUS (Pmode,
14407 GEN_INT (-RED_ZONE_SIZE)));
14408 emit_move_insn (result, operand);
14410 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14416 operand = gen_lowpart (DImode, operand);
14420 gen_rtx_SET (VOIDmode,
14421 gen_rtx_MEM (DImode,
14422 gen_rtx_PRE_DEC (DImode,
14423 stack_pointer_rtx)),
14429 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14438 split_di (&operand, 1, operands, operands + 1);
14440 gen_rtx_SET (VOIDmode,
14441 gen_rtx_MEM (SImode,
14442 gen_rtx_PRE_DEC (Pmode,
14443 stack_pointer_rtx)),
14446 gen_rtx_SET (VOIDmode,
14447 gen_rtx_MEM (SImode,
14448 gen_rtx_PRE_DEC (Pmode,
14449 stack_pointer_rtx)),
14454 /* It is better to store HImodes as SImodes. */
14455 if (!TARGET_PARTIAL_REG_STALL)
14456 operand = gen_lowpart (SImode, operand);
14460 gen_rtx_SET (VOIDmode,
14461 gen_rtx_MEM (GET_MODE (operand),
14462 gen_rtx_PRE_DEC (SImode,
14463 stack_pointer_rtx)),
14469 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14474 /* Free operand from the memory. */
14476 ix86_free_from_memory (enum machine_mode mode)
14478 if (!TARGET_RED_ZONE)
14482 if (mode == DImode || TARGET_64BIT)
14484 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14488 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14489 to pop or add instruction if registers are available. */
14490 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14491 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14496 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14497 QImode must go into class Q_REGS.
14498 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14499 movdf to do mem-to-mem moves through integer regs. */
14501 ix86_preferred_reload_class (rtx x, enum reg_class class)
14503 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14505 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14507 /* SSE can't load any constant directly yet. */
14508 if (SSE_CLASS_P (class))
14510 /* Floats can load 0 and 1. */
14511 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14513 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14514 if (MAYBE_SSE_CLASS_P (class))
14515 return (reg_class_subset_p (class, GENERAL_REGS)
14516 ? GENERAL_REGS : FLOAT_REGS);
14520 /* General regs can load everything. */
14521 if (reg_class_subset_p (class, GENERAL_REGS))
14522 return GENERAL_REGS;
14523 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14524 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14527 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14529 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14534 /* If we are copying between general and FP registers, we need a memory
14535 location. The same is true for SSE and MMX registers.
14537 The macro can't work reliably when one of the CLASSES is class containing
14538 registers from multiple units (SSE, MMX, integer). We avoid this by never
14539 combining those units in single alternative in the machine description.
14540 Ensure that this constraint holds to avoid unexpected surprises.
14542 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14543 enforce these sanity checks. */
14545 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14546 enum machine_mode mode, int strict)
14548 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14549 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14550 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14551 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14552 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14553 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14560 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14561 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14562 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14563 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14564 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14566 /* Return the cost of moving data from a register in class CLASS1 to
14567 one in class CLASS2.
14569 It is not required that the cost always equal 2 when FROM is the same as TO;
14570 on some machines it is expensive to move between registers if they are not
14571 general registers. */
14573 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14574 enum reg_class class2)
14576 /* In case we require secondary memory, compute cost of the store followed
14577 by load. In order to avoid bad register allocation choices, we need
14578 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14580 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14584 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14585 MEMORY_MOVE_COST (mode, class1, 1));
14586 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14587 MEMORY_MOVE_COST (mode, class2, 1));
14589 /* In case of copying from general_purpose_register we may emit multiple
14590 stores followed by single load causing memory size mismatch stall.
14591 Count this as arbitrarily high cost of 20. */
14592 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14595 /* In the case of FP/MMX moves, the registers actually overlap, and we
14596 have to switch modes in order to treat them differently. */
14597 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14598 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14604 /* Moves between SSE/MMX and integer unit are expensive. */
14605 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14606 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14607 return ix86_cost->mmxsse_to_integer;
14608 if (MAYBE_FLOAT_CLASS_P (class1))
14609 return ix86_cost->fp_move;
14610 if (MAYBE_SSE_CLASS_P (class1))
14611 return ix86_cost->sse_move;
14612 if (MAYBE_MMX_CLASS_P (class1))
14613 return ix86_cost->mmx_move;
14617 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14619 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14621 /* Flags and only flags can only hold CCmode values. */
14622 if (CC_REGNO_P (regno))
14623 return GET_MODE_CLASS (mode) == MODE_CC;
14624 if (GET_MODE_CLASS (mode) == MODE_CC
14625 || GET_MODE_CLASS (mode) == MODE_RANDOM
14626 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14628 if (FP_REGNO_P (regno))
14629 return VALID_FP_MODE_P (mode);
14630 if (SSE_REGNO_P (regno))
14631 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14632 if (MMX_REGNO_P (regno))
14634 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14635 /* We handle both integer and floats in the general purpose registers.
14636 In future we should be able to handle vector modes as well. */
14637 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14639 /* Take care for QImode values - they can be in non-QI regs, but then
14640 they do cause partial register stalls. */
14641 if (regno < 4 || mode != QImode || TARGET_64BIT)
14643 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14646 /* Return the cost of moving data of mode M between a
14647 register and memory. A value of 2 is the default; this cost is
14648 relative to those in `REGISTER_MOVE_COST'.
14650 If moving between registers and memory is more expensive than
14651 between two registers, you should define this macro to express the
14654 Model also increased moving costs of QImode registers in non
14658 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14660 if (FLOAT_CLASS_P (class))
14678 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14680 if (SSE_CLASS_P (class))
14683 switch (GET_MODE_SIZE (mode))
14697 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14699 if (MMX_CLASS_P (class))
14702 switch (GET_MODE_SIZE (mode))
14713 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14715 switch (GET_MODE_SIZE (mode))
14719 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14720 : ix86_cost->movzbl_load);
14722 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14723 : ix86_cost->int_store[0] + 4);
14726 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14728 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14729 if (mode == TFmode)
14731 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14732 * (((int) GET_MODE_SIZE (mode)
14733 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14737 /* Compute a (partial) cost for rtx X. Return true if the complete
14738 cost has been computed, and false if subexpressions should be
14739 scanned. In either case, *TOTAL contains the cost result. */
14742 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14744 enum machine_mode mode = GET_MODE (x);
14752 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14754 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14756 else if (flag_pic && SYMBOLIC_CONST (x)
14758 || (!GET_CODE (x) != LABEL_REF
14759 && (GET_CODE (x) != SYMBOL_REF
14760 || !SYMBOL_REF_LOCAL_P (x)))))
14767 if (mode == VOIDmode)
14770 switch (standard_80387_constant_p (x))
14775 default: /* Other constants */
14780 /* Start with (MEM (SYMBOL_REF)), since that's where
14781 it'll probably end up. Add a penalty for size. */
14782 *total = (COSTS_N_INSNS (1)
14783 + (flag_pic != 0 && !TARGET_64BIT)
14784 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14790 /* The zero extensions is often completely free on x86_64, so make
14791 it as cheap as possible. */
14792 if (TARGET_64BIT && mode == DImode
14793 && GET_MODE (XEXP (x, 0)) == SImode)
14795 else if (TARGET_ZERO_EXTEND_WITH_AND)
14796 *total = COSTS_N_INSNS (ix86_cost->add);
14798 *total = COSTS_N_INSNS (ix86_cost->movzx);
14802 *total = COSTS_N_INSNS (ix86_cost->movsx);
14806 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14807 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14809 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14812 *total = COSTS_N_INSNS (ix86_cost->add);
14815 if ((value == 2 || value == 3)
14816 && !TARGET_DECOMPOSE_LEA
14817 && ix86_cost->lea <= ix86_cost->shift_const)
14819 *total = COSTS_N_INSNS (ix86_cost->lea);
14829 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14831 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14833 if (INTVAL (XEXP (x, 1)) > 32)
14834 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14836 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14840 if (GET_CODE (XEXP (x, 1)) == AND)
14841 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14843 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14848 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14849 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14851 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14856 if (FLOAT_MODE_P (mode))
14857 *total = COSTS_N_INSNS (ix86_cost->fmul);
14858 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14860 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14863 for (nbits = 0; value != 0; value >>= 1)
14866 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14867 + nbits * ix86_cost->mult_bit);
14871 /* This is arbitrary */
14872 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14873 + 7 * ix86_cost->mult_bit);
14881 if (FLOAT_MODE_P (mode))
14882 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14884 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14888 if (FLOAT_MODE_P (mode))
14889 *total = COSTS_N_INSNS (ix86_cost->fadd);
14890 else if (!TARGET_DECOMPOSE_LEA
14891 && GET_MODE_CLASS (mode) == MODE_INT
14892 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14894 if (GET_CODE (XEXP (x, 0)) == PLUS
14895 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14896 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14897 && CONSTANT_P (XEXP (x, 1)))
14899 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14900 if (val == 2 || val == 4 || val == 8)
14902 *total = COSTS_N_INSNS (ix86_cost->lea);
14903 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14904 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14906 *total += rtx_cost (XEXP (x, 1), outer_code);
14910 else if (GET_CODE (XEXP (x, 0)) == MULT
14911 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14913 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14914 if (val == 2 || val == 4 || val == 8)
14916 *total = COSTS_N_INSNS (ix86_cost->lea);
14917 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14918 *total += rtx_cost (XEXP (x, 1), outer_code);
14922 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14924 *total = COSTS_N_INSNS (ix86_cost->lea);
14925 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14926 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14927 *total += rtx_cost (XEXP (x, 1), outer_code);
14934 if (FLOAT_MODE_P (mode))
14936 *total = COSTS_N_INSNS (ix86_cost->fadd);
14944 if (!TARGET_64BIT && mode == DImode)
14946 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14947 + (rtx_cost (XEXP (x, 0), outer_code)
14948 << (GET_MODE (XEXP (x, 0)) != DImode))
14949 + (rtx_cost (XEXP (x, 1), outer_code)
14950 << (GET_MODE (XEXP (x, 1)) != DImode)));
14956 if (FLOAT_MODE_P (mode))
14958 *total = COSTS_N_INSNS (ix86_cost->fchs);
14964 if (!TARGET_64BIT && mode == DImode)
14965 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14967 *total = COSTS_N_INSNS (ix86_cost->add);
14971 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14976 if (FLOAT_MODE_P (mode))
14977 *total = COSTS_N_INSNS (ix86_cost->fabs);
14981 if (FLOAT_MODE_P (mode))
14982 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14986 if (XINT (x, 1) == UNSPEC_TP)
14995 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14997 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15000 fputs ("\tpushl $", asm_out_file);
15001 assemble_name (asm_out_file, XSTR (symbol, 0));
15002 fputc ('\n', asm_out_file);
15008 static int current_machopic_label_num;
15010 /* Given a symbol name and its associated stub, write out the
15011 definition of the stub. */
15014 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15016 unsigned int length;
15017 char *binder_name, *symbol_name, lazy_ptr_name[32];
15018 int label = ++current_machopic_label_num;
15020 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15021 symb = (*targetm.strip_name_encoding) (symb);
15023 length = strlen (stub);
15024 binder_name = alloca (length + 32);
15025 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15027 length = strlen (symb);
15028 symbol_name = alloca (length + 32);
15029 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15031 sprintf (lazy_ptr_name, "L%d$lz", label);
15034 machopic_picsymbol_stub_section ();
15036 machopic_symbol_stub_section ();
15038 fprintf (file, "%s:\n", stub);
15039 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15043 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15044 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15045 fprintf (file, "\tjmp %%edx\n");
15048 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15050 fprintf (file, "%s:\n", binder_name);
15054 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15055 fprintf (file, "\tpushl %%eax\n");
15058 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15060 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15062 machopic_lazy_symbol_ptr_section ();
15063 fprintf (file, "%s:\n", lazy_ptr_name);
15064 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15065 fprintf (file, "\t.long %s\n", binder_name);
15067 #endif /* TARGET_MACHO */
15069 /* Order the registers for register allocator. */
15072 x86_order_regs_for_local_alloc (void)
15077 /* First allocate the local general purpose registers. */
15078 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15079 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15080 reg_alloc_order [pos++] = i;
15082 /* Global general purpose registers. */
15083 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15084 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15085 reg_alloc_order [pos++] = i;
15087 /* x87 registers come first in case we are doing FP math
15089 if (!TARGET_SSE_MATH)
15090 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15091 reg_alloc_order [pos++] = i;
15093 /* SSE registers. */
15094 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15095 reg_alloc_order [pos++] = i;
15096 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15097 reg_alloc_order [pos++] = i;
15099 /* x87 registers. */
15100 if (TARGET_SSE_MATH)
15101 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15102 reg_alloc_order [pos++] = i;
15104 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15105 reg_alloc_order [pos++] = i;
15107 /* Initialize the rest of array as we do not allocate some registers
15109 while (pos < FIRST_PSEUDO_REGISTER)
15110 reg_alloc_order [pos++] = 0;
15113 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15114 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15117 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15118 struct attribute_spec.handler. */
15120 ix86_handle_struct_attribute (tree *node, tree name,
15121 tree args ATTRIBUTE_UNUSED,
15122 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15125 if (DECL_P (*node))
15127 if (TREE_CODE (*node) == TYPE_DECL)
15128 type = &TREE_TYPE (*node);
15133 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15134 || TREE_CODE (*type) == UNION_TYPE)))
15136 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15137 *no_add_attrs = true;
15140 else if ((is_attribute_p ("ms_struct", name)
15141 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15142 || ((is_attribute_p ("gcc_struct", name)
15143 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15145 warning ("`%s' incompatible attribute ignored",
15146 IDENTIFIER_POINTER (name));
15147 *no_add_attrs = true;
15154 ix86_ms_bitfield_layout_p (tree record_type)
15156 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15157 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15158 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15161 /* Returns an expression indicating where the this parameter is
15162 located on entry to the FUNCTION. */
15165 x86_this_parameter (tree function)
15167 tree type = TREE_TYPE (function);
15171 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15172 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15175 if (ix86_function_regparm (type, function) > 0)
15179 parm = TYPE_ARG_TYPES (type);
15180 /* Figure out whether or not the function has a variable number of
15182 for (; parm; parm = TREE_CHAIN (parm))
15183 if (TREE_VALUE (parm) == void_type_node)
15185 /* If not, the this parameter is in the first argument. */
15189 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15191 return gen_rtx_REG (SImode, 0);
15195 if (aggregate_value_p (TREE_TYPE (type), type))
15196 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15198 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15201 /* Determine whether x86_output_mi_thunk can succeed. */
15204 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15205 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15206 HOST_WIDE_INT vcall_offset, tree function)
15208 /* 64-bit can handle anything. */
15212 /* For 32-bit, everything's fine if we have one free register. */
15213 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15216 /* Need a free register for vcall_offset. */
15220 /* Need a free register for GOT references. */
15221 if (flag_pic && !(*targetm.binds_local_p) (function))
15224 /* Otherwise ok. */
15228 /* Output the assembler code for a thunk function. THUNK_DECL is the
15229 declaration for the thunk function itself, FUNCTION is the decl for
15230 the target function. DELTA is an immediate constant offset to be
15231 added to THIS. If VCALL_OFFSET is nonzero, the word at
15232 *(*this + vcall_offset) should be added to THIS. */
15235 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15236 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15237 HOST_WIDE_INT vcall_offset, tree function)
15240 rtx this = x86_this_parameter (function);
15243 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15244 pull it in now and let DELTA benefit. */
15247 else if (vcall_offset)
15249 /* Put the this parameter into %eax. */
15251 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15252 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15255 this_reg = NULL_RTX;
15257 /* Adjust the this parameter by a fixed constant. */
15260 xops[0] = GEN_INT (delta);
15261 xops[1] = this_reg ? this_reg : this;
15264 if (!x86_64_general_operand (xops[0], DImode))
15266 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15268 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15272 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15275 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15278 /* Adjust the this parameter by a value stored in the vtable. */
15282 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15285 int tmp_regno = 2 /* ECX */;
15286 if (lookup_attribute ("fastcall",
15287 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15288 tmp_regno = 0 /* EAX */;
15289 tmp = gen_rtx_REG (SImode, tmp_regno);
15292 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15295 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15297 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15299 /* Adjust the this parameter. */
15300 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15301 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15303 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15304 xops[0] = GEN_INT (vcall_offset);
15306 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15307 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15309 xops[1] = this_reg;
15311 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15313 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15316 /* If necessary, drop THIS back to its stack slot. */
15317 if (this_reg && this_reg != this)
15319 xops[0] = this_reg;
15321 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15324 xops[0] = XEXP (DECL_RTL (function), 0);
15327 if (!flag_pic || (*targetm.binds_local_p) (function))
15328 output_asm_insn ("jmp\t%P0", xops);
15331 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15332 tmp = gen_rtx_CONST (Pmode, tmp);
15333 tmp = gen_rtx_MEM (QImode, tmp);
15335 output_asm_insn ("jmp\t%A0", xops);
15340 if (!flag_pic || (*targetm.binds_local_p) (function))
15341 output_asm_insn ("jmp\t%P0", xops);
15346 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15347 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15348 tmp = gen_rtx_MEM (QImode, tmp);
15350 output_asm_insn ("jmp\t%0", xops);
15353 #endif /* TARGET_MACHO */
15355 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15356 output_set_got (tmp);
15359 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15360 output_asm_insn ("jmp\t{*}%1", xops);
15366 x86_file_start (void)
15368 default_file_start ();
15369 if (X86_FILE_START_VERSION_DIRECTIVE)
15370 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15371 if (X86_FILE_START_FLTUSED)
15372 fputs ("\t.global\t__fltused\n", asm_out_file);
15373 if (ix86_asm_dialect == ASM_INTEL)
15374 fputs ("\t.intel_syntax\n", asm_out_file);
15378 x86_field_alignment (tree field, int computed)
15380 enum machine_mode mode;
15381 tree type = TREE_TYPE (field);
15383 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15385 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15386 ? get_inner_array_type (type) : type);
15387 if (mode == DFmode || mode == DCmode
15388 || GET_MODE_CLASS (mode) == MODE_INT
15389 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15390 return MIN (32, computed);
15394 /* Output assembler code to FILE to increment profiler label # LABELNO
15395 for profiling a function entry. */
15397 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15402 #ifndef NO_PROFILE_COUNTERS
15403 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15405 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15409 #ifndef NO_PROFILE_COUNTERS
15410 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15412 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15416 #ifndef NO_PROFILE_COUNTERS
15417 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15418 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15420 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15424 #ifndef NO_PROFILE_COUNTERS
15425 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15426 PROFILE_COUNT_REGISTER);
15428 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15432 /* We don't have exact information about the insn sizes, but we may assume
15433 quite safely that we are informed about all 1 byte insns and memory
15434 address sizes. This is enough to eliminate unnecessary padding in
15438 min_insn_size (rtx insn)
15442 if (!INSN_P (insn) || !active_insn_p (insn))
15445 /* Discard alignments we've emit and jump instructions. */
15446 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15447 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15449 if (GET_CODE (insn) == JUMP_INSN
15450 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15451 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15454 /* Important case - calls are always 5 bytes.
15455 It is common to have many calls in the row. */
15456 if (GET_CODE (insn) == CALL_INSN
15457 && symbolic_reference_mentioned_p (PATTERN (insn))
15458 && !SIBLING_CALL_P (insn))
15460 if (get_attr_length (insn) <= 1)
15463 /* For normal instructions we may rely on the sizes of addresses
15464 and the presence of symbol to require 4 bytes of encoding.
15465 This is not the case for jumps where references are PC relative. */
15466 if (GET_CODE (insn) != JUMP_INSN)
15468 l = get_attr_length_address (insn);
15469 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15478 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15482 k8_avoid_jump_misspredicts (void)
15484 rtx insn, start = get_insns ();
15485 int nbytes = 0, njumps = 0;
15488 /* Look for all minimal intervals of instructions containing 4 jumps.
15489 The intervals are bounded by START and INSN. NBYTES is the total
15490 size of instructions in the interval including INSN and not including
15491 START. When the NBYTES is smaller than 16 bytes, it is possible
15492 that the end of START and INSN ends up in the same 16byte page.
15494 The smallest offset in the page INSN can start is the case where START
15495 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15496 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15498 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15501 nbytes += min_insn_size (insn);
15503 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15504 INSN_UID (insn), min_insn_size (insn));
15505 if ((GET_CODE (insn) == JUMP_INSN
15506 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15507 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15508 || GET_CODE (insn) == CALL_INSN)
15515 start = NEXT_INSN (start);
15516 if ((GET_CODE (start) == JUMP_INSN
15517 && GET_CODE (PATTERN (start)) != ADDR_VEC
15518 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15519 || GET_CODE (start) == CALL_INSN)
15520 njumps--, isjump = 1;
15523 nbytes -= min_insn_size (start);
15528 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15529 INSN_UID (start), INSN_UID (insn), nbytes);
15531 if (njumps == 3 && isjump && nbytes < 16)
15533 int padsize = 15 - nbytes + min_insn_size (insn);
15536 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15537 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15542 /* Implement machine specific optimizations.
15543 At the moment we implement single transformation: AMD Athlon works faster
15544 when RET is not destination of conditional jump or directly preceded
15545 by other jump instruction. We avoid the penalty by inserting NOP just
15546 before the RET instructions in such cases. */
15552 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15554 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15556 basic_block bb = e->src;
15559 bool replace = false;
15561 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15562 || !maybe_hot_bb_p (bb))
15564 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15565 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15567 if (prev && GET_CODE (prev) == CODE_LABEL)
15570 for (e = bb->pred; e; e = e->pred_next)
15571 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15572 && !(e->flags & EDGE_FALLTHRU))
15577 prev = prev_active_insn (ret);
15579 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15580 || GET_CODE (prev) == CALL_INSN))
15582 /* Empty functions get branch mispredict even when the jump destination
15583 is not visible to us. */
15584 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15589 emit_insn_before (gen_return_internal_long (), ret);
15593 k8_avoid_jump_misspredicts ();
15596 /* Return nonzero when QImode register that must be represented via REX prefix
15599 x86_extended_QIreg_mentioned_p (rtx insn)
15602 extract_insn_cached (insn);
15603 for (i = 0; i < recog_data.n_operands; i++)
15604 if (REG_P (recog_data.operand[i])
15605 && REGNO (recog_data.operand[i]) >= 4)
15610 /* Return nonzero when P points to register encoded via REX prefix.
15611 Called via for_each_rtx. */
15613 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15615 unsigned int regno;
15618 regno = REGNO (*p);
15619 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15622 /* Return true when INSN mentions register that must be encoded using REX
15625 x86_extended_reg_mentioned_p (rtx insn)
15627 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15630 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15631 optabs would emit if we didn't have TFmode patterns. */
15634 x86_emit_floatuns (rtx operands[2])
15636 rtx neglab, donelab, i0, i1, f0, in, out;
15637 enum machine_mode mode, inmode;
15639 inmode = GET_MODE (operands[1]);
15640 if (inmode != SImode
15641 && inmode != DImode)
15645 in = force_reg (inmode, operands[1]);
15646 mode = GET_MODE (out);
15647 neglab = gen_label_rtx ();
15648 donelab = gen_label_rtx ();
15649 i1 = gen_reg_rtx (Pmode);
15650 f0 = gen_reg_rtx (mode);
15652 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15654 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15655 emit_jump_insn (gen_jump (donelab));
15658 emit_label (neglab);
15660 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15661 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15662 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15663 expand_float (f0, i0, 0);
15664 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15666 emit_label (donelab);
15669 /* Return if we do not know how to pass TYPE solely in registers. */
15671 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15673 if (default_must_pass_in_stack (mode, type))
15675 return (!TARGET_64BIT && type && mode == TImode);
15678 #include "gt-i386.h"