1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 /* The encoding characters for the four TLS models present in ELF. */
673 static char const tls_model_chars[] = " GLil";
675 #define MAX_386_STACK_LOCALS 3
676 /* Size of the register save area. */
677 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
679 /* Define the structure for the machine field in struct function. */
681 struct stack_local_entry GTY(())
686 struct stack_local_entry *next;
689 /* Structure describing stack frame layout.
690 Stack grows downward:
696 saved frame pointer if frame_pointer_needed
697 <- HARD_FRAME_POINTER
703 > to_allocate <- FRAME_POINTER
715 int outgoing_arguments_size;
718 HOST_WIDE_INT to_allocate;
719 /* The offsets relative to ARG_POINTER. */
720 HOST_WIDE_INT frame_pointer_offset;
721 HOST_WIDE_INT hard_frame_pointer_offset;
722 HOST_WIDE_INT stack_pointer_offset;
724 /* When save_regs_using_mov is set, emit prologue using
725 move instead of push instructions. */
726 bool save_regs_using_mov;
729 /* Used to enable/disable debugging features. */
730 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
731 /* Code model option as passed by user. */
732 const char *ix86_cmodel_string;
734 enum cmodel ix86_cmodel;
736 const char *ix86_asm_string;
737 enum asm_dialect ix86_asm_dialect = ASM_ATT;
739 const char *ix86_tls_dialect_string;
740 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
742 /* Which unit we are generating floating point math for. */
743 enum fpmath_unit ix86_fpmath;
745 /* Which cpu are we scheduling for. */
746 enum processor_type ix86_tune;
747 /* Which instruction set architecture to use. */
748 enum processor_type ix86_arch;
750 /* Strings to hold which cpu and instruction set architecture to use. */
751 const char *ix86_tune_string; /* for -mtune=<xxx> */
752 const char *ix86_arch_string; /* for -march=<xxx> */
753 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
755 /* # of registers to use to pass arguments. */
756 const char *ix86_regparm_string;
758 /* true if sse prefetch instruction is not NOOP. */
759 int x86_prefetch_sse;
761 /* ix86_regparm_string as a number */
764 /* Alignment to use for loops and jumps: */
766 /* Power of two alignment for loops. */
767 const char *ix86_align_loops_string;
769 /* Power of two alignment for non-loop jumps. */
770 const char *ix86_align_jumps_string;
772 /* Power of two alignment for stack boundary in bytes. */
773 const char *ix86_preferred_stack_boundary_string;
775 /* Preferred alignment for stack boundary in bits. */
776 int ix86_preferred_stack_boundary;
778 /* Values 1-5: see jump.c */
779 int ix86_branch_cost;
780 const char *ix86_branch_cost_string;
782 /* Power of two alignment for functions. */
783 const char *ix86_align_funcs_string;
785 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
786 static char internal_label_prefix[16];
787 static int internal_label_prefix_len;
789 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
790 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
791 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
792 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
794 static const char *get_some_local_dynamic_name PARAMS ((void));
795 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
796 static rtx maybe_get_pool_constant PARAMS ((rtx));
797 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
798 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
800 static rtx get_thread_pointer PARAMS ((void));
801 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
802 static rtx gen_push PARAMS ((rtx));
803 static int memory_address_length PARAMS ((rtx addr));
804 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
806 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
807 static void ix86_dump_ppro_packet PARAMS ((FILE *));
808 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
809 static struct machine_function * ix86_init_machine_status PARAMS ((void));
810 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
811 static int ix86_nsaved_regs PARAMS ((void));
812 static void ix86_emit_save_regs PARAMS ((void));
813 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
814 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
815 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
816 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
817 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
818 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
819 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
820 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
821 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
822 static int ix86_issue_rate PARAMS ((void));
823 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
824 static void ix86_sched_init PARAMS ((FILE *, int, int));
825 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
826 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
827 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
828 static int ia32_multipass_dfa_lookahead PARAMS ((void));
829 static void ix86_init_mmx_sse_builtins PARAMS ((void));
830 static rtx x86_this_parameter PARAMS ((tree));
831 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree));
833 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree));
835 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
839 rtx base, index, disp;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
849 static const char *ix86_strip_name_encoding PARAMS ((const char *))
852 struct builtin_description;
853 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
855 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
857 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
859 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
860 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
861 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
862 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
863 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
867 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
869 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
872 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
873 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
874 static int ix86_save_reg PARAMS ((unsigned int, int));
875 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
876 static int ix86_comp_type_attributes PARAMS ((tree, tree));
877 static int ix86_fntype_regparm PARAMS ((tree));
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
880 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int ix86_value_regno PARAMS ((enum machine_mode));
883 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
884 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
885 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
886 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
887 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
889 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
890 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
893 /* Register class used for passing given 64bit part of the argument.
894 These represent classes as documented by the PS ABI, with the exception
895 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
896 use SF or DFmode move instead of DImode to avoid reformatting penalties.
898 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
899 whenever possible (upper half does contain padding).
901 enum x86_64_reg_class
904 X86_64_INTEGER_CLASS,
905 X86_64_INTEGERSI_CLASS,
914 static const char * const x86_64_reg_class_name[] =
915 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
917 #define MAX_CLASSES 4
918 static int classify_argument PARAMS ((enum machine_mode, tree,
919 enum x86_64_reg_class [MAX_CLASSES],
921 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
923 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
925 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
926 enum x86_64_reg_class));
928 /* Table of constants used by fldpi, fldln2, etc... */
929 static REAL_VALUE_TYPE ext_80387_constants_table [5];
930 static bool ext_80387_constants_init = 0;
931 static void init_ext_80387_constants PARAMS ((void));
933 /* Initialize the GCC target structure. */
934 #undef TARGET_ATTRIBUTE_TABLE
935 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
936 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
937 # undef TARGET_MERGE_DECL_ATTRIBUTES
938 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
941 #undef TARGET_COMP_TYPE_ATTRIBUTES
942 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
944 #undef TARGET_INIT_BUILTINS
945 #define TARGET_INIT_BUILTINS ix86_init_builtins
947 #undef TARGET_EXPAND_BUILTIN
948 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
950 #undef TARGET_ASM_FUNCTION_EPILOGUE
951 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
953 #undef TARGET_ASM_OPEN_PAREN
954 #define TARGET_ASM_OPEN_PAREN ""
955 #undef TARGET_ASM_CLOSE_PAREN
956 #define TARGET_ASM_CLOSE_PAREN ""
958 #undef TARGET_ASM_ALIGNED_HI_OP
959 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
960 #undef TARGET_ASM_ALIGNED_SI_OP
961 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
963 #undef TARGET_ASM_ALIGNED_DI_OP
964 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
967 #undef TARGET_ASM_UNALIGNED_HI_OP
968 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
969 #undef TARGET_ASM_UNALIGNED_SI_OP
970 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
971 #undef TARGET_ASM_UNALIGNED_DI_OP
972 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
974 #undef TARGET_SCHED_ADJUST_COST
975 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
976 #undef TARGET_SCHED_ISSUE_RATE
977 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
978 #undef TARGET_SCHED_VARIABLE_ISSUE
979 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
980 #undef TARGET_SCHED_INIT
981 #define TARGET_SCHED_INIT ix86_sched_init
982 #undef TARGET_SCHED_REORDER
983 #define TARGET_SCHED_REORDER ix86_sched_reorder
984 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
985 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
986 ia32_use_dfa_pipeline_interface
987 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
988 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
989 ia32_multipass_dfa_lookahead
991 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
992 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
995 #undef TARGET_HAVE_TLS
996 #define TARGET_HAVE_TLS true
998 #undef TARGET_CANNOT_FORCE_CONST_MEM
999 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1001 #undef TARGET_DELEGITIMIZE_ADDRESS
1002 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1004 #undef TARGET_MS_BITFIELD_LAYOUT_P
1005 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1007 #undef TARGET_ASM_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1009 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1010 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1012 #undef TARGET_RTX_COSTS
1013 #define TARGET_RTX_COSTS ix86_rtx_costs
1014 #undef TARGET_ADDRESS_COST
1015 #define TARGET_ADDRESS_COST ix86_address_cost
1017 struct gcc_target targetm = TARGET_INITIALIZER;
1019 /* The svr4 ABI for the i386 says that records and unions are returned
1021 #ifndef DEFAULT_PCC_STRUCT_RETURN
1022 #define DEFAULT_PCC_STRUCT_RETURN 1
1025 /* Sometimes certain combinations of command options do not make
1026 sense on a particular target machine. You can define a macro
1027 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1028 defined, is executed once just after all the command options have
1031 Don't use this macro to turn on various extra optimizations for
1032 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1038 /* Comes from final.c -- no real reason to change it. */
1039 #define MAX_CODE_ALIGN 16
1043 const struct processor_costs *cost; /* Processor costs */
1044 const int target_enable; /* Target flags to enable. */
1045 const int target_disable; /* Target flags to disable. */
1046 const int align_loop; /* Default alignments. */
1047 const int align_loop_max_skip;
1048 const int align_jump;
1049 const int align_jump_max_skip;
1050 const int align_func;
1052 const processor_target_table[PROCESSOR_max] =
1054 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1055 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1056 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1057 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1058 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1059 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1060 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1061 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1064 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1067 const char *const name; /* processor name or nickname. */
1068 const enum processor_type processor;
1069 const enum pta_flags
1074 PTA_PREFETCH_SSE = 8,
1080 const processor_alias_table[] =
1082 {"i386", PROCESSOR_I386, 0},
1083 {"i486", PROCESSOR_I486, 0},
1084 {"i586", PROCESSOR_PENTIUM, 0},
1085 {"pentium", PROCESSOR_PENTIUM, 0},
1086 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1087 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1088 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1089 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1090 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1091 {"i686", PROCESSOR_PENTIUMPRO, 0},
1092 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1093 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1094 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1095 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1096 PTA_MMX | PTA_PREFETCH_SSE},
1097 {"k6", PROCESSOR_K6, PTA_MMX},
1098 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1099 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1100 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1102 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1103 | PTA_3DNOW | PTA_3DNOW_A},
1104 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1111 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1114 int const pta_size = ARRAY_SIZE (processor_alias_table);
1116 /* By default our XFmode is the 80-bit extended format. If we have
1117 use TFmode instead, it's also the 80-bit format, but with padding. */
1118 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1119 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1121 /* Set the default values for switches whose default depends on TARGET_64BIT
1122 in case they weren't overwritten by command line options. */
1125 if (flag_omit_frame_pointer == 2)
1126 flag_omit_frame_pointer = 1;
1127 if (flag_asynchronous_unwind_tables == 2)
1128 flag_asynchronous_unwind_tables = 1;
1129 if (flag_pcc_struct_return == 2)
1130 flag_pcc_struct_return = 0;
1134 if (flag_omit_frame_pointer == 2)
1135 flag_omit_frame_pointer = 0;
1136 if (flag_asynchronous_unwind_tables == 2)
1137 flag_asynchronous_unwind_tables = 0;
1138 if (flag_pcc_struct_return == 2)
1139 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1142 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1143 SUBTARGET_OVERRIDE_OPTIONS;
1146 if (!ix86_tune_string && ix86_arch_string)
1147 ix86_tune_string = ix86_arch_string;
1148 if (!ix86_tune_string)
1149 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1150 if (!ix86_arch_string)
1151 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1153 if (ix86_cmodel_string != 0)
1155 if (!strcmp (ix86_cmodel_string, "small"))
1156 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1158 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1159 else if (!strcmp (ix86_cmodel_string, "32"))
1160 ix86_cmodel = CM_32;
1161 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1162 ix86_cmodel = CM_KERNEL;
1163 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1164 ix86_cmodel = CM_MEDIUM;
1165 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1166 ix86_cmodel = CM_LARGE;
1168 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1172 ix86_cmodel = CM_32;
1174 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1176 if (ix86_asm_string != 0)
1178 if (!strcmp (ix86_asm_string, "intel"))
1179 ix86_asm_dialect = ASM_INTEL;
1180 else if (!strcmp (ix86_asm_string, "att"))
1181 ix86_asm_dialect = ASM_ATT;
1183 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1185 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1186 error ("code model `%s' not supported in the %s bit mode",
1187 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1188 if (ix86_cmodel == CM_LARGE)
1189 sorry ("code model `large' not supported yet");
1190 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1191 sorry ("%i-bit mode not compiled in",
1192 (target_flags & MASK_64BIT) ? 64 : 32);
1194 for (i = 0; i < pta_size; i++)
1195 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1197 ix86_arch = processor_alias_table[i].processor;
1198 /* Default cpu tuning to the architecture. */
1199 ix86_tune = ix86_arch;
1200 if (processor_alias_table[i].flags & PTA_MMX
1201 && !(target_flags_explicit & MASK_MMX))
1202 target_flags |= MASK_MMX;
1203 if (processor_alias_table[i].flags & PTA_3DNOW
1204 && !(target_flags_explicit & MASK_3DNOW))
1205 target_flags |= MASK_3DNOW;
1206 if (processor_alias_table[i].flags & PTA_3DNOW_A
1207 && !(target_flags_explicit & MASK_3DNOW_A))
1208 target_flags |= MASK_3DNOW_A;
1209 if (processor_alias_table[i].flags & PTA_SSE
1210 && !(target_flags_explicit & MASK_SSE))
1211 target_flags |= MASK_SSE;
1212 if (processor_alias_table[i].flags & PTA_SSE2
1213 && !(target_flags_explicit & MASK_SSE2))
1214 target_flags |= MASK_SSE2;
1215 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1216 x86_prefetch_sse = true;
1217 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1218 error ("CPU you selected does not support x86-64 instruction set");
1223 error ("bad value (%s) for -march= switch", ix86_arch_string);
1225 for (i = 0; i < pta_size; i++)
1226 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1228 ix86_tune = processor_alias_table[i].processor;
1229 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1230 error ("CPU you selected does not support x86-64 instruction set");
1233 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1234 x86_prefetch_sse = true;
1236 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1239 ix86_cost = &size_cost;
1241 ix86_cost = processor_target_table[ix86_tune].cost;
1242 target_flags |= processor_target_table[ix86_tune].target_enable;
1243 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1245 /* Arrange to set up i386_stack_locals for all functions. */
1246 init_machine_status = ix86_init_machine_status;
1248 /* Validate -mregparm= value. */
1249 if (ix86_regparm_string)
1251 i = atoi (ix86_regparm_string);
1252 if (i < 0 || i > REGPARM_MAX)
1253 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1259 ix86_regparm = REGPARM_MAX;
1261 /* If the user has provided any of the -malign-* options,
1262 warn and use that value only if -falign-* is not set.
1263 Remove this code in GCC 3.2 or later. */
1264 if (ix86_align_loops_string)
1266 warning ("-malign-loops is obsolete, use -falign-loops");
1267 if (align_loops == 0)
1269 i = atoi (ix86_align_loops_string);
1270 if (i < 0 || i > MAX_CODE_ALIGN)
1271 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1273 align_loops = 1 << i;
1277 if (ix86_align_jumps_string)
1279 warning ("-malign-jumps is obsolete, use -falign-jumps");
1280 if (align_jumps == 0)
1282 i = atoi (ix86_align_jumps_string);
1283 if (i < 0 || i > MAX_CODE_ALIGN)
1284 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1286 align_jumps = 1 << i;
1290 if (ix86_align_funcs_string)
1292 warning ("-malign-functions is obsolete, use -falign-functions");
1293 if (align_functions == 0)
1295 i = atoi (ix86_align_funcs_string);
1296 if (i < 0 || i > MAX_CODE_ALIGN)
1297 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1299 align_functions = 1 << i;
1303 /* Default align_* from the processor table. */
1304 if (align_loops == 0)
1306 align_loops = processor_target_table[ix86_tune].align_loop;
1307 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1309 if (align_jumps == 0)
1311 align_jumps = processor_target_table[ix86_tune].align_jump;
1312 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1314 if (align_functions == 0)
1316 align_functions = processor_target_table[ix86_tune].align_func;
1319 /* Validate -mpreferred-stack-boundary= value, or provide default.
1320 The default of 128 bits is for Pentium III's SSE __m128, but we
1321 don't want additional code to keep the stack aligned when
1322 optimizing for code size. */
1323 ix86_preferred_stack_boundary = (optimize_size
1324 ? TARGET_64BIT ? 128 : 32
1326 if (ix86_preferred_stack_boundary_string)
1328 i = atoi (ix86_preferred_stack_boundary_string);
1329 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1330 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1331 TARGET_64BIT ? 4 : 2);
1333 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1336 /* Validate -mbranch-cost= value, or provide default. */
1337 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1338 if (ix86_branch_cost_string)
1340 i = atoi (ix86_branch_cost_string);
1342 error ("-mbranch-cost=%d is not between 0 and 5", i);
1344 ix86_branch_cost = i;
1347 if (ix86_tls_dialect_string)
1349 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_GNU;
1351 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1352 ix86_tls_dialect = TLS_DIALECT_SUN;
1354 error ("bad value (%s) for -mtls-dialect= switch",
1355 ix86_tls_dialect_string);
1358 /* Keep nonleaf frame pointers. */
1359 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1360 flag_omit_frame_pointer = 1;
1362 /* If we're doing fast math, we don't care about comparison order
1363 wrt NaNs. This lets us use a shorter comparison sequence. */
1364 if (flag_unsafe_math_optimizations)
1365 target_flags &= ~MASK_IEEE_FP;
1367 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1368 since the insns won't need emulation. */
1369 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1370 target_flags &= ~MASK_NO_FANCY_MATH_387;
1374 if (TARGET_ALIGN_DOUBLE)
1375 error ("-malign-double makes no sense in the 64bit mode");
1377 error ("-mrtd calling convention not supported in the 64bit mode");
1378 /* Enable by default the SSE and MMX builtins. */
1379 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1380 ix86_fpmath = FPMATH_SSE;
1383 ix86_fpmath = FPMATH_387;
1385 if (ix86_fpmath_string != 0)
1387 if (! strcmp (ix86_fpmath_string, "387"))
1388 ix86_fpmath = FPMATH_387;
1389 else if (! strcmp (ix86_fpmath_string, "sse"))
1393 warning ("SSE instruction set disabled, using 387 arithmetics");
1394 ix86_fpmath = FPMATH_387;
1397 ix86_fpmath = FPMATH_SSE;
1399 else if (! strcmp (ix86_fpmath_string, "387,sse")
1400 || ! strcmp (ix86_fpmath_string, "sse,387"))
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1407 else if (!TARGET_80387)
1409 warning ("387 instruction set disabled, using SSE arithmetics");
1410 ix86_fpmath = FPMATH_SSE;
1413 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1416 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1419 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1423 target_flags |= MASK_MMX;
1424 x86_prefetch_sse = true;
1427 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1430 target_flags |= MASK_MMX;
1431 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1432 extensions it adds. */
1433 if (x86_3dnow_a & (1 << ix86_arch))
1434 target_flags |= MASK_3DNOW_A;
1436 if ((x86_accumulate_outgoing_args & TUNEMASK)
1437 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1439 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1441 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1444 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1445 p = strchr (internal_label_prefix, 'X');
1446 internal_label_prefix_len = p - internal_label_prefix;
1452 optimization_options (level, size)
1454 int size ATTRIBUTE_UNUSED;
1456 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1457 make the problem with not enough registers even worse. */
1458 #ifdef INSN_SCHEDULING
1460 flag_schedule_insns = 0;
1463 /* The default values of these switches depend on the TARGET_64BIT
1464 that is not known at this moment. Mark these values with 2 and
1465 let user the to override these. In case there is no command line option
1466 specifying them, we will set the defaults in override_options. */
1468 flag_omit_frame_pointer = 2;
1469 flag_pcc_struct_return = 2;
1470 flag_asynchronous_unwind_tables = 2;
1473 /* Table of valid machine attributes. */
1474 const struct attribute_spec ix86_attribute_table[] =
1476 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1477 /* Stdcall attribute says callee is responsible for popping arguments
1478 if they are not variable. */
1479 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1480 /* Fastcall attribute says callee is responsible for popping arguments
1481 if they are not variable. */
1482 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1483 /* Cdecl attribute says the callee is a normal C declaration */
1484 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1485 /* Regparm attribute specifies how many integer arguments are to be
1486 passed in registers. */
1487 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1488 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1489 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1490 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1491 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1493 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1495 { NULL, 0, 0, false, false, false, NULL }
1498 /* Decide whether we can make a sibling call to a function. DECL is the
1499 declaration of the function being targeted by the call and EXP is the
1500 CALL_EXPR representing the call. */
1503 ix86_function_ok_for_sibcall (decl, exp)
1507 /* If we are generating position-independent code, we cannot sibcall
1508 optimize any indirect call, or a direct call to a global function,
1509 as the PLT requires %ebx be live. */
1510 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1513 /* If we are returning floats on the 80387 register stack, we cannot
1514 make a sibcall from a function that doesn't return a float to a
1515 function that does or, conversely, from a function that does return
1516 a float to a function that doesn't; the necessary stack adjustment
1517 would not be executed. */
1518 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1519 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1522 /* If this call is indirect, we'll need to be able to use a call-clobbered
1523 register for the address of the target function. Make sure that all
1524 such registers are not used for passing parameters. */
1525 if (!decl && !TARGET_64BIT)
1527 int regparm = ix86_regparm;
1530 /* We're looking at the CALL_EXPR, we need the type of the function. */
1531 type = TREE_OPERAND (exp, 0); /* pointer expression */
1532 type = TREE_TYPE (type); /* pointer type */
1533 type = TREE_TYPE (type); /* function type */
1535 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1537 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1541 /* ??? Need to count the actual number of registers to be used,
1542 not the possible number of registers. Fix later. */
1547 /* Otherwise okay. That also includes certain types of indirect calls. */
1551 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1552 arguments as in struct attribute_spec.handler. */
1554 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1557 tree args ATTRIBUTE_UNUSED;
1558 int flags ATTRIBUTE_UNUSED;
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1572 if (is_attribute_p ("fastcall", name))
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1576 error ("fastcall and stdcall attributes are not compatible");
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and regparm attributes are not compatible");
1583 else if (is_attribute_p ("stdcall", name))
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1587 error ("fastcall and stdcall attributes are not compatible");
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1601 /* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1604 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1608 int flags ATTRIBUTE_UNUSED;
1611 if (TREE_CODE (*node) != FUNCTION_TYPE
1612 && TREE_CODE (*node) != METHOD_TYPE
1613 && TREE_CODE (*node) != FIELD_DECL
1614 && TREE_CODE (*node) != TYPE_DECL)
1616 warning ("`%s' attribute only applies to functions",
1617 IDENTIFIER_POINTER (name));
1618 *no_add_attrs = true;
1624 cst = TREE_VALUE (args);
1625 if (TREE_CODE (cst) != INTEGER_CST)
1627 warning ("`%s' attribute requires an integer constant argument",
1628 IDENTIFIER_POINTER (name));
1629 *no_add_attrs = true;
1631 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1633 warning ("argument to `%s' attribute larger than %d",
1634 IDENTIFIER_POINTER (name), REGPARM_MAX);
1635 *no_add_attrs = true;
1638 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1640 error ("fastcall and regparm attributes are not compatible");
1647 /* Return 0 if the attributes for two types are incompatible, 1 if they
1648 are compatible, and 2 if they are nearly compatible (which causes a
1649 warning to be generated). */
1652 ix86_comp_type_attributes (type1, type2)
1656 /* Check for mismatch of non-default calling convention. */
1657 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1659 if (TREE_CODE (type1) != FUNCTION_TYPE)
1662 /* Check for mismatched fastcall types */
1663 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1664 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1667 /* Check for mismatched return types (cdecl vs stdcall). */
1668 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1669 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1674 /* Return the regparm value for a fuctio with the indicated TYPE. */
1677 ix86_fntype_regparm (type)
1682 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1684 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1686 return ix86_regparm;
1689 /* Value is the number of bytes of arguments automatically
1690 popped when returning from a subroutine call.
1691 FUNDECL is the declaration node of the function (as a tree),
1692 FUNTYPE is the data type of the function (as a tree),
1693 or for a library call it is an identifier node for the subroutine name.
1694 SIZE is the number of bytes of arguments passed on the stack.
1696 On the 80386, the RTD insn may be used to pop them if the number
1697 of args is fixed, but if the number is variable then the caller
1698 must pop them all. RTD can't be used for library calls now
1699 because the library is compiled with the Unix compiler.
1700 Use of RTD is a selectable option, since it is incompatible with
1701 standard Unix calling sequences. If the option is not selected,
1702 the caller must always pop the args.
1704 The attribute stdcall is equivalent to RTD on a per module basis. */
1707 ix86_return_pops_args (fundecl, funtype, size)
1712 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1714 /* Cdecl functions override -mrtd, and never pop the stack. */
1715 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1717 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1718 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1719 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1723 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1724 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1725 == void_type_node)))
1729 /* Lose any fake structure return argument if it is passed on the stack. */
1730 if (aggregate_value_p (TREE_TYPE (funtype))
1733 int nregs = ix86_fntype_regparm (funtype);
1736 return GET_MODE_SIZE (Pmode);
1742 /* Argument support functions. */
1744 /* Return true when register may be used to pass function parameters. */
1746 ix86_function_arg_regno_p (regno)
1751 return (regno < REGPARM_MAX
1752 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1753 if (SSE_REGNO_P (regno) && TARGET_SSE)
1755 /* RAX is used as hidden argument to va_arg functions. */
1758 for (i = 0; i < REGPARM_MAX; i++)
1759 if (regno == x86_64_int_parameter_registers[i])
1764 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1765 for a call to a function whose data type is FNTYPE.
1766 For a library call, FNTYPE is 0. */
1769 init_cumulative_args (cum, fntype, libname, fndecl)
1770 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1771 tree fntype; /* tree ptr for function decl */
1772 rtx libname; /* SYMBOL_REF of library name or 0 */
1775 static CUMULATIVE_ARGS zero_cum;
1776 tree param, next_param;
1777 bool user_convention = false;
1779 if (TARGET_DEBUG_ARG)
1781 fprintf (stderr, "\ninit_cumulative_args (");
1783 fprintf (stderr, "fntype code = %s, ret code = %s",
1784 tree_code_name[(int) TREE_CODE (fntype)],
1785 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1787 fprintf (stderr, "no fntype");
1790 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1795 /* Set up the number of registers to use for passing arguments. */
1796 cum->nregs = ix86_regparm;
1797 cum->sse_nregs = SSE_REGPARM_MAX;
1798 if (fntype && !TARGET_64BIT)
1800 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1804 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1808 cum->maybe_vaarg = false;
1810 /* Use ecx and edx registers if function has fastcall attribute */
1811 if (fntype && !TARGET_64BIT)
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1817 user_convention = true;
1821 /* Use register calling convention for local functions when possible. */
1822 if (!TARGET_64BIT && !user_convention && fndecl
1823 && flag_unit_at_a_time)
1825 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1828 /* We can't use regparm(3) for nested functions as these use
1829 static chain pointer in third argument. */
1830 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1838 /* Determine if this function has variable arguments. This is
1839 indicated by the last argument being 'void_type_mode' if there
1840 are no variable arguments. If there are variable arguments, then
1841 we won't pass anything in registers */
1845 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1846 param != 0; param = next_param)
1848 next_param = TREE_CHAIN (param);
1849 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1856 cum->maybe_vaarg = true;
1860 if ((!fntype && !libname)
1861 || (fntype && !TYPE_ARG_TYPES (fntype)))
1862 cum->maybe_vaarg = 1;
1864 if (TARGET_DEBUG_ARG)
1865 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1870 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1871 of this code is to classify each 8bytes of incoming argument by the register
1872 class and assign registers accordingly. */
1874 /* Return the union class of CLASS1 and CLASS2.
1875 See the x86-64 PS ABI for details. */
1877 static enum x86_64_reg_class
1878 merge_classes (class1, class2)
1879 enum x86_64_reg_class class1, class2;
1881 /* Rule #1: If both classes are equal, this is the resulting class. */
1882 if (class1 == class2)
1885 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1887 if (class1 == X86_64_NO_CLASS)
1889 if (class2 == X86_64_NO_CLASS)
1892 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1893 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1894 return X86_64_MEMORY_CLASS;
1896 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1897 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1898 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1899 return X86_64_INTEGERSI_CLASS;
1900 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1901 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1902 return X86_64_INTEGER_CLASS;
1904 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1905 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1906 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1907 return X86_64_MEMORY_CLASS;
1909 /* Rule #6: Otherwise class SSE is used. */
1910 return X86_64_SSE_CLASS;
1913 /* Classify the argument of type TYPE and mode MODE.
1914 CLASSES will be filled by the register class used to pass each word
1915 of the operand. The number of words is returned. In case the parameter
1916 should be passed in memory, 0 is returned. As a special case for zero
1917 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1919 BIT_OFFSET is used internally for handling records and specifies offset
1920 of the offset in bits modulo 256 to avoid overflow cases.
1922 See the x86-64 PS ABI for details.
1926 classify_argument (mode, type, classes, bit_offset)
1927 enum machine_mode mode;
1929 enum x86_64_reg_class classes[MAX_CLASSES];
1933 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1934 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1936 /* Variable sized entities are always passed/returned in memory. */
1940 if (mode != VOIDmode
1941 && MUST_PASS_IN_STACK (mode, type))
1944 if (type && AGGREGATE_TYPE_P (type))
1948 enum x86_64_reg_class subclasses[MAX_CLASSES];
1950 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1954 for (i = 0; i < words; i++)
1955 classes[i] = X86_64_NO_CLASS;
1957 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1958 signalize memory class, so handle it as special case. */
1961 classes[0] = X86_64_NO_CLASS;
1965 /* Classify each field of record and merge classes. */
1966 if (TREE_CODE (type) == RECORD_TYPE)
1968 /* For classes first merge in the field of the subclasses. */
1969 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1971 tree bases = TYPE_BINFO_BASETYPES (type);
1972 int n_bases = TREE_VEC_LENGTH (bases);
1975 for (i = 0; i < n_bases; ++i)
1977 tree binfo = TREE_VEC_ELT (bases, i);
1979 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1980 tree type = BINFO_TYPE (binfo);
1982 num = classify_argument (TYPE_MODE (type),
1984 (offset + bit_offset) % 256);
1987 for (i = 0; i < num; i++)
1989 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1991 merge_classes (subclasses[i], classes[i + pos]);
1995 /* And now merge the fields of structure. */
1996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1998 if (TREE_CODE (field) == FIELD_DECL)
2002 /* Bitfields are always classified as integer. Handle them
2003 early, since later code would consider them to be
2004 misaligned integers. */
2005 if (DECL_BIT_FIELD (field))
2007 for (i = int_bit_position (field) / 8 / 8;
2008 i < (int_bit_position (field)
2009 + tree_low_cst (DECL_SIZE (field), 0)
2012 merge_classes (X86_64_INTEGER_CLASS,
2017 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2018 TREE_TYPE (field), subclasses,
2019 (int_bit_position (field)
2020 + bit_offset) % 256);
2023 for (i = 0; i < num; i++)
2026 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2028 merge_classes (subclasses[i], classes[i + pos]);
2034 /* Arrays are handled as small records. */
2035 else if (TREE_CODE (type) == ARRAY_TYPE)
2038 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2039 TREE_TYPE (type), subclasses, bit_offset);
2043 /* The partial classes are now full classes. */
2044 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2045 subclasses[0] = X86_64_SSE_CLASS;
2046 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2047 subclasses[0] = X86_64_INTEGER_CLASS;
2049 for (i = 0; i < words; i++)
2050 classes[i] = subclasses[i % num];
2052 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2053 else if (TREE_CODE (type) == UNION_TYPE
2054 || TREE_CODE (type) == QUAL_UNION_TYPE)
2056 /* For classes first merge in the field of the subclasses. */
2057 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2059 tree bases = TYPE_BINFO_BASETYPES (type);
2060 int n_bases = TREE_VEC_LENGTH (bases);
2063 for (i = 0; i < n_bases; ++i)
2065 tree binfo = TREE_VEC_ELT (bases, i);
2067 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2068 tree type = BINFO_TYPE (binfo);
2070 num = classify_argument (TYPE_MODE (type),
2072 (offset + (bit_offset % 64)) % 256);
2075 for (i = 0; i < num; i++)
2077 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2079 merge_classes (subclasses[i], classes[i + pos]);
2083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2085 if (TREE_CODE (field) == FIELD_DECL)
2088 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2089 TREE_TYPE (field), subclasses,
2093 for (i = 0; i < num; i++)
2094 classes[i] = merge_classes (subclasses[i], classes[i]);
2101 /* Final merger cleanup. */
2102 for (i = 0; i < words; i++)
2104 /* If one class is MEMORY, everything should be passed in
2106 if (classes[i] == X86_64_MEMORY_CLASS)
2109 /* The X86_64_SSEUP_CLASS should be always preceded by
2110 X86_64_SSE_CLASS. */
2111 if (classes[i] == X86_64_SSEUP_CLASS
2112 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2113 classes[i] = X86_64_SSE_CLASS;
2115 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2116 if (classes[i] == X86_64_X87UP_CLASS
2117 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2118 classes[i] = X86_64_SSE_CLASS;
2123 /* Compute alignment needed. We align all types to natural boundaries with
2124 exception of XFmode that is aligned to 64bits. */
2125 if (mode != VOIDmode && mode != BLKmode)
2127 int mode_alignment = GET_MODE_BITSIZE (mode);
2130 mode_alignment = 128;
2131 else if (mode == XCmode)
2132 mode_alignment = 256;
2133 /* Misaligned fields are always returned in memory. */
2134 if (bit_offset % mode_alignment)
2138 /* Classification of atomic types. */
2148 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2149 classes[0] = X86_64_INTEGERSI_CLASS;
2151 classes[0] = X86_64_INTEGER_CLASS;
2155 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2158 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2159 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2162 if (!(bit_offset % 64))
2163 classes[0] = X86_64_SSESF_CLASS;
2165 classes[0] = X86_64_SSE_CLASS;
2168 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[0] = X86_64_X87_CLASS;
2172 classes[1] = X86_64_X87UP_CLASS;
2175 classes[0] = X86_64_X87_CLASS;
2176 classes[1] = X86_64_X87UP_CLASS;
2177 classes[2] = X86_64_X87_CLASS;
2178 classes[3] = X86_64_X87UP_CLASS;
2181 classes[0] = X86_64_SSEDF_CLASS;
2182 classes[1] = X86_64_SSEDF_CLASS;
2185 classes[0] = X86_64_SSE_CLASS;
2193 classes[0] = X86_64_SSE_CLASS;
2194 classes[1] = X86_64_SSEUP_CLASS;
2209 /* Examine the argument and return set number of register required in each
2210 class. Return 0 iff parameter should be passed in memory. */
2212 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2213 enum machine_mode mode;
2215 int *int_nregs, *sse_nregs;
2218 enum x86_64_reg_class class[MAX_CLASSES];
2219 int n = classify_argument (mode, type, class, 0);
2225 for (n--; n >= 0; n--)
2228 case X86_64_INTEGER_CLASS:
2229 case X86_64_INTEGERSI_CLASS:
2232 case X86_64_SSE_CLASS:
2233 case X86_64_SSESF_CLASS:
2234 case X86_64_SSEDF_CLASS:
2237 case X86_64_NO_CLASS:
2238 case X86_64_SSEUP_CLASS:
2240 case X86_64_X87_CLASS:
2241 case X86_64_X87UP_CLASS:
2245 case X86_64_MEMORY_CLASS:
2250 /* Construct container for the argument used by GCC interface. See
2251 FUNCTION_ARG for the detailed description. */
2253 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2254 enum machine_mode mode;
2257 int nintregs, nsseregs;
2261 enum machine_mode tmpmode;
2263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2264 enum x86_64_reg_class class[MAX_CLASSES];
2268 int needed_sseregs, needed_intregs;
2269 rtx exp[MAX_CLASSES];
2272 n = classify_argument (mode, type, class, 0);
2273 if (TARGET_DEBUG_ARG)
2276 fprintf (stderr, "Memory class\n");
2279 fprintf (stderr, "Classes:");
2280 for (i = 0; i < n; i++)
2282 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2284 fprintf (stderr, "\n");
2289 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2291 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2294 /* First construct simple cases. Avoid SCmode, since we want to use
2295 single register to pass this type. */
2296 if (n == 1 && mode != SCmode)
2299 case X86_64_INTEGER_CLASS:
2300 case X86_64_INTEGERSI_CLASS:
2301 return gen_rtx_REG (mode, intreg[0]);
2302 case X86_64_SSE_CLASS:
2303 case X86_64_SSESF_CLASS:
2304 case X86_64_SSEDF_CLASS:
2305 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2306 case X86_64_X87_CLASS:
2307 return gen_rtx_REG (mode, FIRST_STACK_REG);
2308 case X86_64_NO_CLASS:
2309 /* Zero sized array, struct or class. */
2314 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2315 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2317 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2318 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2319 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2320 && class[1] == X86_64_INTEGER_CLASS
2321 && (mode == CDImode || mode == TImode)
2322 && intreg[0] + 1 == intreg[1])
2323 return gen_rtx_REG (mode, intreg[0]);
2325 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2326 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2327 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2329 /* Otherwise figure out the entries of the PARALLEL. */
2330 for (i = 0; i < n; i++)
2334 case X86_64_NO_CLASS:
2336 case X86_64_INTEGER_CLASS:
2337 case X86_64_INTEGERSI_CLASS:
2338 /* Merge TImodes on aligned occasions here too. */
2339 if (i * 8 + 8 > bytes)
2340 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2341 else if (class[i] == X86_64_INTEGERSI_CLASS)
2345 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2346 if (tmpmode == BLKmode)
2348 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2349 gen_rtx_REG (tmpmode, *intreg),
2353 case X86_64_SSESF_CLASS:
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (SFmode,
2356 SSE_REGNO (sse_regno)),
2360 case X86_64_SSEDF_CLASS:
2361 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2362 gen_rtx_REG (DFmode,
2363 SSE_REGNO (sse_regno)),
2367 case X86_64_SSE_CLASS:
2368 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2373 gen_rtx_REG (tmpmode,
2374 SSE_REGNO (sse_regno)),
2376 if (tmpmode == TImode)
2384 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2385 for (i = 0; i < nexps; i++)
2386 XVECEXP (ret, 0, i) = exp [i];
2390 /* Update the data in CUM to advance over an argument
2391 of mode MODE and data type TYPE.
2392 (TYPE is null for libcalls where that information may not be available.) */
2395 function_arg_advance (cum, mode, type, named)
2396 CUMULATIVE_ARGS *cum; /* current arg information */
2397 enum machine_mode mode; /* current arg mode */
2398 tree type; /* type of the argument or 0 if lib support */
2399 int named; /* whether or not the argument was named */
2402 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2403 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2405 if (TARGET_DEBUG_ARG)
2407 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2408 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2411 int int_nregs, sse_nregs;
2412 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2413 cum->words += words;
2414 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2416 cum->nregs -= int_nregs;
2417 cum->sse_nregs -= sse_nregs;
2418 cum->regno += int_nregs;
2419 cum->sse_regno += sse_nregs;
2422 cum->words += words;
2426 if (TARGET_SSE && mode == TImode)
2428 cum->sse_words += words;
2429 cum->sse_nregs -= 1;
2430 cum->sse_regno += 1;
2431 if (cum->sse_nregs <= 0)
2439 cum->words += words;
2440 cum->nregs -= words;
2441 cum->regno += words;
2443 if (cum->nregs <= 0)
2453 /* Define where to put the arguments to a function.
2454 Value is zero to push the argument on the stack,
2455 or a hard register in which to store the argument.
2457 MODE is the argument's machine mode.
2458 TYPE is the data type of the argument (as a tree).
2459 This is null for libcalls where that information may
2461 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2462 the preceding args and about the function being called.
2463 NAMED is nonzero if this argument is a named parameter
2464 (otherwise it is an extra parameter matching an ellipsis). */
2467 function_arg (cum, mode, type, named)
2468 CUMULATIVE_ARGS *cum; /* current arg information */
2469 enum machine_mode mode; /* current arg mode */
2470 tree type; /* type of the argument or 0 if lib support */
2471 int named; /* != 0 for normal args, == 0 for ... args */
2475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2476 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2478 /* Handle a hidden AL argument containing number of registers for varargs
2479 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2481 if (mode == VOIDmode)
2484 return GEN_INT (cum->maybe_vaarg
2485 ? (cum->sse_nregs < 0
2493 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2494 &x86_64_int_parameter_registers [cum->regno],
2499 /* For now, pass fp/complex values on the stack. */
2511 if (words <= cum->nregs)
2513 int regno = cum->regno;
2515 /* Fastcall allocates the first two DWORD (SImode) or
2516 smaller arguments to ECX and EDX. */
2519 if (mode == BLKmode || mode == DImode)
2522 /* ECX not EAX is the first allocated register. */
2526 ret = gen_rtx_REG (mode, regno);
2531 ret = gen_rtx_REG (mode, cum->sse_regno);
2535 if (TARGET_DEBUG_ARG)
2538 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2539 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2542 print_simple_rtl (stderr, ret);
2544 fprintf (stderr, ", stack");
2546 fprintf (stderr, " )\n");
2552 /* A C expression that indicates when an argument must be passed by
2553 reference. If nonzero for an argument, a copy of that argument is
2554 made in memory and a pointer to the argument is passed instead of
2555 the argument itself. The pointer is passed in whatever way is
2556 appropriate for passing a pointer to that type. */
2559 function_arg_pass_by_reference (cum, mode, type, named)
2560 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2561 enum machine_mode mode ATTRIBUTE_UNUSED;
2563 int named ATTRIBUTE_UNUSED;
2568 if (type && int_size_in_bytes (type) == -1)
2570 if (TARGET_DEBUG_ARG)
2571 fprintf (stderr, "function_arg_pass_by_reference\n");
2578 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2581 contains_128bit_aligned_vector_p (type)
2584 enum machine_mode mode = TYPE_MODE (type);
2585 if (SSE_REG_MODE_P (mode)
2586 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2588 if (TYPE_ALIGN (type) < 128)
2591 if (AGGREGATE_TYPE_P (type))
2593 /* Walk the agregates recursivly. */
2594 if (TREE_CODE (type) == RECORD_TYPE
2595 || TREE_CODE (type) == UNION_TYPE
2596 || TREE_CODE (type) == QUAL_UNION_TYPE)
2600 if (TYPE_BINFO (type) != NULL
2601 && TYPE_BINFO_BASETYPES (type) != NULL)
2603 tree bases = TYPE_BINFO_BASETYPES (type);
2604 int n_bases = TREE_VEC_LENGTH (bases);
2607 for (i = 0; i < n_bases; ++i)
2609 tree binfo = TREE_VEC_ELT (bases, i);
2610 tree type = BINFO_TYPE (binfo);
2612 if (contains_128bit_aligned_vector_p (type))
2616 /* And now merge the fields of structure. */
2617 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2619 if (TREE_CODE (field) == FIELD_DECL
2620 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2624 /* Just for use if some languages passes arrays by value. */
2625 else if (TREE_CODE (type) == ARRAY_TYPE)
2627 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2636 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2640 ix86_function_arg_boundary (mode, type)
2641 enum machine_mode mode;
2646 align = TYPE_ALIGN (type);
2648 align = GET_MODE_ALIGNMENT (mode);
2649 if (align < PARM_BOUNDARY)
2650 align = PARM_BOUNDARY;
2653 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2654 make an exception for SSE modes since these require 128bit
2657 The handling here differs from field_alignment. ICC aligns MMX
2658 arguments to 4 byte boundaries, while structure fields are aligned
2659 to 8 byte boundaries. */
2662 if (!SSE_REG_MODE_P (mode))
2663 align = PARM_BOUNDARY;
2667 if (!contains_128bit_aligned_vector_p (type))
2668 align = PARM_BOUNDARY;
2670 if (align != PARM_BOUNDARY && !TARGET_SSE)
2678 /* Return true if N is a possible register number of function value. */
2680 ix86_function_value_regno_p (regno)
2685 return ((regno) == 0
2686 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2687 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2689 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2690 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2691 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2694 /* Define how to find the value returned by a function.
2695 VALTYPE is the data type of the value (as a tree).
2696 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2697 otherwise, FUNC is 0. */
2699 ix86_function_value (valtype)
2704 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2705 REGPARM_MAX, SSE_REGPARM_MAX,
2706 x86_64_int_return_registers, 0);
2707 /* For zero sized structures, construct_container return NULL, but we need
2708 to keep rest of compiler happy by returning meaningful value. */
2710 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2714 return gen_rtx_REG (TYPE_MODE (valtype),
2715 ix86_value_regno (TYPE_MODE (valtype)));
2718 /* Return false iff type is returned in memory. */
2720 ix86_return_in_memory (type)
2723 int needed_intregs, needed_sseregs;
2726 return !examine_argument (TYPE_MODE (type), type, 1,
2727 &needed_intregs, &needed_sseregs);
2731 if (TYPE_MODE (type) == BLKmode)
2733 else if (MS_AGGREGATE_RETURN
2734 && AGGREGATE_TYPE_P (type)
2735 && int_size_in_bytes(type) <= 8)
2737 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2738 && int_size_in_bytes (type) == 8)
2739 || (int_size_in_bytes (type) > 12
2740 && TYPE_MODE (type) != TImode
2741 && TYPE_MODE (type) != TFmode
2742 && !VECTOR_MODE_P (TYPE_MODE (type))))
2748 /* Define how to find the value returned by a library function
2749 assuming the value has mode MODE. */
2751 ix86_libcall_value (mode)
2752 enum machine_mode mode;
2762 return gen_rtx_REG (mode, FIRST_SSE_REG);
2765 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2767 return gen_rtx_REG (mode, 0);
2771 return gen_rtx_REG (mode, ix86_value_regno (mode));
2774 /* Given a mode, return the register to use for a return value. */
2777 ix86_value_regno (mode)
2778 enum machine_mode mode;
2780 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2781 return FIRST_FLOAT_REG;
2782 if (mode == TImode || VECTOR_MODE_P (mode))
2783 return FIRST_SSE_REG;
2787 /* Create the va_list data type. */
2790 ix86_build_va_list ()
2792 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2794 /* For i386 we use plain pointer to argument area. */
2796 return build_pointer_type (char_type_node);
2798 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2799 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2801 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2802 unsigned_type_node);
2803 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2804 unsigned_type_node);
2805 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2807 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2810 DECL_FIELD_CONTEXT (f_gpr) = record;
2811 DECL_FIELD_CONTEXT (f_fpr) = record;
2812 DECL_FIELD_CONTEXT (f_ovf) = record;
2813 DECL_FIELD_CONTEXT (f_sav) = record;
2815 TREE_CHAIN (record) = type_decl;
2816 TYPE_NAME (record) = type_decl;
2817 TYPE_FIELDS (record) = f_gpr;
2818 TREE_CHAIN (f_gpr) = f_fpr;
2819 TREE_CHAIN (f_fpr) = f_ovf;
2820 TREE_CHAIN (f_ovf) = f_sav;
2822 layout_type (record);
2824 /* The correct type is an array type of one element. */
2825 return build_array_type (record, build_index_type (size_zero_node));
2828 /* Perform any needed actions needed for a function that is receiving a
2829 variable number of arguments.
2833 MODE and TYPE are the mode and type of the current parameter.
2835 PRETEND_SIZE is a variable that should be set to the amount of stack
2836 that must be pushed by the prolog to pretend that our caller pushed
2839 Normally, this macro will push all remaining incoming registers on the
2840 stack and set PRETEND_SIZE to the length of the registers pushed. */
2843 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2844 CUMULATIVE_ARGS *cum;
2845 enum machine_mode mode;
2847 int *pretend_size ATTRIBUTE_UNUSED;
2851 CUMULATIVE_ARGS next_cum;
2852 rtx save_area = NULL_RTX, mem;
2865 /* Indicate to allocate space on the stack for varargs save area. */
2866 ix86_save_varrargs_registers = 1;
2868 cfun->stack_alignment_needed = 128;
2870 fntype = TREE_TYPE (current_function_decl);
2871 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2872 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2873 != void_type_node));
2875 /* For varargs, we do not want to skip the dummy va_dcl argument.
2876 For stdargs, we do want to skip the last named argument. */
2879 function_arg_advance (&next_cum, mode, type, 1);
2882 save_area = frame_pointer_rtx;
2884 set = get_varargs_alias_set ();
2886 for (i = next_cum.regno; i < ix86_regparm; i++)
2888 mem = gen_rtx_MEM (Pmode,
2889 plus_constant (save_area, i * UNITS_PER_WORD));
2890 set_mem_alias_set (mem, set);
2891 emit_move_insn (mem, gen_rtx_REG (Pmode,
2892 x86_64_int_parameter_registers[i]));
2895 if (next_cum.sse_nregs)
2897 /* Now emit code to save SSE registers. The AX parameter contains number
2898 of SSE parameter registers used to call this function. We use
2899 sse_prologue_save insn template that produces computed jump across
2900 SSE saves. We need some preparation work to get this working. */
2902 label = gen_label_rtx ();
2903 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2905 /* Compute address to jump to :
2906 label - 5*eax + nnamed_sse_arguments*5 */
2907 tmp_reg = gen_reg_rtx (Pmode);
2908 nsse_reg = gen_reg_rtx (Pmode);
2909 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2910 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2911 gen_rtx_MULT (Pmode, nsse_reg,
2913 if (next_cum.sse_regno)
2916 gen_rtx_CONST (DImode,
2917 gen_rtx_PLUS (DImode,
2919 GEN_INT (next_cum.sse_regno * 4))));
2921 emit_move_insn (nsse_reg, label_ref);
2922 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2924 /* Compute address of memory block we save into. We always use pointer
2925 pointing 127 bytes after first byte to store - this is needed to keep
2926 instruction size limited by 4 bytes. */
2927 tmp_reg = gen_reg_rtx (Pmode);
2928 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2929 plus_constant (save_area,
2930 8 * REGPARM_MAX + 127)));
2931 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2932 set_mem_alias_set (mem, set);
2933 set_mem_align (mem, BITS_PER_WORD);
2935 /* And finally do the dirty job! */
2936 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2937 GEN_INT (next_cum.sse_regno), label));
2942 /* Implement va_start. */
2945 ix86_va_start (valist, nextarg)
2949 HOST_WIDE_INT words, n_gpr, n_fpr;
2950 tree f_gpr, f_fpr, f_ovf, f_sav;
2951 tree gpr, fpr, ovf, sav, t;
2953 /* Only 64bit target needs something special. */
2956 std_expand_builtin_va_start (valist, nextarg);
2960 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2961 f_fpr = TREE_CHAIN (f_gpr);
2962 f_ovf = TREE_CHAIN (f_fpr);
2963 f_sav = TREE_CHAIN (f_ovf);
2965 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2966 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2967 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2968 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2969 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2971 /* Count number of gp and fp argument registers used. */
2972 words = current_function_args_info.words;
2973 n_gpr = current_function_args_info.regno;
2974 n_fpr = current_function_args_info.sse_regno;
2976 if (TARGET_DEBUG_ARG)
2977 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2978 (int) words, (int) n_gpr, (int) n_fpr);
2980 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2981 build_int_2 (n_gpr * 8, 0));
2982 TREE_SIDE_EFFECTS (t) = 1;
2983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2985 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2986 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2987 TREE_SIDE_EFFECTS (t) = 1;
2988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2990 /* Find the overflow area. */
2991 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2993 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2994 build_int_2 (words * UNITS_PER_WORD, 0));
2995 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2996 TREE_SIDE_EFFECTS (t) = 1;
2997 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2999 /* Find the register save area.
3000 Prologue of the function save it right above stack frame. */
3001 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3002 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3003 TREE_SIDE_EFFECTS (t) = 1;
3004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3007 /* Implement va_arg. */
3009 ix86_va_arg (valist, type)
3012 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3013 tree f_gpr, f_fpr, f_ovf, f_sav;
3014 tree gpr, fpr, ovf, sav, t;
3016 rtx lab_false, lab_over = NULL_RTX;
3021 /* Only 64bit target needs something special. */
3024 return std_expand_builtin_va_arg (valist, type);
3027 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3028 f_fpr = TREE_CHAIN (f_gpr);
3029 f_ovf = TREE_CHAIN (f_fpr);
3030 f_sav = TREE_CHAIN (f_ovf);
3032 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3033 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3034 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3035 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3036 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3038 size = int_size_in_bytes (type);
3041 /* Passed by reference. */
3043 type = build_pointer_type (type);
3044 size = int_size_in_bytes (type);
3046 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3048 container = construct_container (TYPE_MODE (type), type, 0,
3049 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3051 * Pull the value out of the saved registers ...
3054 addr_rtx = gen_reg_rtx (Pmode);
3058 rtx int_addr_rtx, sse_addr_rtx;
3059 int needed_intregs, needed_sseregs;
3062 lab_over = gen_label_rtx ();
3063 lab_false = gen_label_rtx ();
3065 examine_argument (TYPE_MODE (type), type, 0,
3066 &needed_intregs, &needed_sseregs);
3069 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3070 || TYPE_ALIGN (type) > 128);
3072 /* In case we are passing structure, verify that it is consecutive block
3073 on the register save area. If not we need to do moves. */
3074 if (!need_temp && !REG_P (container))
3076 /* Verify that all registers are strictly consecutive */
3077 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3081 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3083 rtx slot = XVECEXP (container, 0, i);
3084 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3085 || INTVAL (XEXP (slot, 1)) != i * 16)
3093 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3095 rtx slot = XVECEXP (container, 0, i);
3096 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3097 || INTVAL (XEXP (slot, 1)) != i * 8)
3104 int_addr_rtx = addr_rtx;
3105 sse_addr_rtx = addr_rtx;
3109 int_addr_rtx = gen_reg_rtx (Pmode);
3110 sse_addr_rtx = gen_reg_rtx (Pmode);
3112 /* First ensure that we fit completely in registers. */
3115 emit_cmp_and_jump_insns (expand_expr
3116 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3117 GEN_INT ((REGPARM_MAX - needed_intregs +
3118 1) * 8), GE, const1_rtx, SImode,
3123 emit_cmp_and_jump_insns (expand_expr
3124 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3125 GEN_INT ((SSE_REGPARM_MAX -
3126 needed_sseregs + 1) * 16 +
3127 REGPARM_MAX * 8), GE, const1_rtx,
3128 SImode, 1, lab_false);
3131 /* Compute index to start of area used for integer regs. */
3134 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3135 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3136 if (r != int_addr_rtx)
3137 emit_move_insn (int_addr_rtx, r);
3141 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3142 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3143 if (r != sse_addr_rtx)
3144 emit_move_insn (sse_addr_rtx, r);
3151 /* Never use the memory itself, as it has the alias set. */
3152 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3153 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3154 set_mem_alias_set (mem, get_varargs_alias_set ());
3155 set_mem_align (mem, BITS_PER_UNIT);
3157 for (i = 0; i < XVECLEN (container, 0); i++)
3159 rtx slot = XVECEXP (container, 0, i);
3160 rtx reg = XEXP (slot, 0);
3161 enum machine_mode mode = GET_MODE (reg);
3167 if (SSE_REGNO_P (REGNO (reg)))
3169 src_addr = sse_addr_rtx;
3170 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3174 src_addr = int_addr_rtx;
3175 src_offset = REGNO (reg) * 8;
3177 src_mem = gen_rtx_MEM (mode, src_addr);
3178 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3179 src_mem = adjust_address (src_mem, mode, src_offset);
3180 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3181 emit_move_insn (dest_mem, src_mem);
3188 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3189 build_int_2 (needed_intregs * 8, 0));
3190 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3191 TREE_SIDE_EFFECTS (t) = 1;
3192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3197 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3198 build_int_2 (needed_sseregs * 16, 0));
3199 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3200 TREE_SIDE_EFFECTS (t) = 1;
3201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3204 emit_jump_insn (gen_jump (lab_over));
3206 emit_label (lab_false);
3209 /* ... otherwise out of the overflow area. */
3211 /* Care for on-stack alignment if needed. */
3212 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3216 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3217 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3218 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3222 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3224 emit_move_insn (addr_rtx, r);
3227 build (PLUS_EXPR, TREE_TYPE (t), t,
3228 build_int_2 (rsize * UNITS_PER_WORD, 0));
3229 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3230 TREE_SIDE_EFFECTS (t) = 1;
3231 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3234 emit_label (lab_over);
3238 r = gen_rtx_MEM (Pmode, addr_rtx);
3239 set_mem_alias_set (r, get_varargs_alias_set ());
3240 emit_move_insn (addr_rtx, r);
3246 /* Return nonzero if OP is either a i387 or SSE fp register. */
3248 any_fp_register_operand (op, mode)
3250 enum machine_mode mode ATTRIBUTE_UNUSED;
3252 return ANY_FP_REG_P (op);
3255 /* Return nonzero if OP is an i387 fp register. */
3257 fp_register_operand (op, mode)
3259 enum machine_mode mode ATTRIBUTE_UNUSED;
3261 return FP_REG_P (op);
3264 /* Return nonzero if OP is a non-fp register_operand. */
3266 register_and_not_any_fp_reg_operand (op, mode)
3268 enum machine_mode mode;
3270 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3273 /* Return nonzero if OP is a register operand other than an
3274 i387 fp register. */
3276 register_and_not_fp_reg_operand (op, mode)
3278 enum machine_mode mode;
3280 return register_operand (op, mode) && !FP_REG_P (op);
3283 /* Return nonzero if OP is general operand representable on x86_64. */
3286 x86_64_general_operand (op, mode)
3288 enum machine_mode mode;
3291 return general_operand (op, mode);
3292 if (nonimmediate_operand (op, mode))
3294 return x86_64_sign_extended_value (op);
3297 /* Return nonzero if OP is general operand representable on x86_64
3298 as either sign extended or zero extended constant. */
3301 x86_64_szext_general_operand (op, mode)
3303 enum machine_mode mode;
3306 return general_operand (op, mode);
3307 if (nonimmediate_operand (op, mode))
3309 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3312 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3315 x86_64_nonmemory_operand (op, mode)
3317 enum machine_mode mode;
3320 return nonmemory_operand (op, mode);
3321 if (register_operand (op, mode))
3323 return x86_64_sign_extended_value (op);
3326 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3329 x86_64_movabs_operand (op, mode)
3331 enum machine_mode mode;
3333 if (!TARGET_64BIT || !flag_pic)
3334 return nonmemory_operand (op, mode);
3335 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3337 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3342 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3345 x86_64_szext_nonmemory_operand (op, mode)
3347 enum machine_mode mode;
3350 return nonmemory_operand (op, mode);
3351 if (register_operand (op, mode))
3353 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3356 /* Return nonzero if OP is immediate operand representable on x86_64. */
3359 x86_64_immediate_operand (op, mode)
3361 enum machine_mode mode;
3364 return immediate_operand (op, mode);
3365 return x86_64_sign_extended_value (op);
3368 /* Return nonzero if OP is immediate operand representable on x86_64. */
3371 x86_64_zext_immediate_operand (op, mode)
3373 enum machine_mode mode ATTRIBUTE_UNUSED;
3375 return x86_64_zero_extended_value (op);
3378 /* Return nonzero if OP is (const_int 1), else return zero. */
3381 const_int_1_operand (op, mode)
3383 enum machine_mode mode ATTRIBUTE_UNUSED;
3385 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3388 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3389 for shift & compare patterns, as shifting by 0 does not change flags),
3390 else return zero. */
3393 const_int_1_31_operand (op, mode)
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3397 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3400 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3401 reference and a constant. */
3404 symbolic_operand (op, mode)
3406 enum machine_mode mode ATTRIBUTE_UNUSED;
3408 switch (GET_CODE (op))
3416 if (GET_CODE (op) == SYMBOL_REF
3417 || GET_CODE (op) == LABEL_REF
3418 || (GET_CODE (op) == UNSPEC
3419 && (XINT (op, 1) == UNSPEC_GOT
3420 || XINT (op, 1) == UNSPEC_GOTOFF
3421 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3423 if (GET_CODE (op) != PLUS
3424 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3428 if (GET_CODE (op) == SYMBOL_REF
3429 || GET_CODE (op) == LABEL_REF)
3431 /* Only @GOTOFF gets offsets. */
3432 if (GET_CODE (op) != UNSPEC
3433 || XINT (op, 1) != UNSPEC_GOTOFF)
3436 op = XVECEXP (op, 0, 0);
3437 if (GET_CODE (op) == SYMBOL_REF
3438 || GET_CODE (op) == LABEL_REF)
3447 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3450 pic_symbolic_operand (op, mode)
3452 enum machine_mode mode ATTRIBUTE_UNUSED;
3454 if (GET_CODE (op) != CONST)
3459 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3464 if (GET_CODE (op) == UNSPEC)
3466 if (GET_CODE (op) != PLUS
3467 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3470 if (GET_CODE (op) == UNSPEC)
3476 /* Return true if OP is a symbolic operand that resolves locally. */
3479 local_symbolic_operand (op, mode)
3481 enum machine_mode mode ATTRIBUTE_UNUSED;
3483 if (GET_CODE (op) == CONST
3484 && GET_CODE (XEXP (op, 0)) == PLUS
3485 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3486 op = XEXP (XEXP (op, 0), 0);
3488 if (GET_CODE (op) == LABEL_REF)
3491 if (GET_CODE (op) != SYMBOL_REF)
3494 /* These we've been told are local by varasm and encode_section_info
3496 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3499 /* There is, however, a not insubstantial body of code in the rest of
3500 the compiler that assumes it can just stick the results of
3501 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3502 /* ??? This is a hack. Should update the body of the compiler to
3503 always create a DECL an invoke targetm.encode_section_info. */
3504 if (strncmp (XSTR (op, 0), internal_label_prefix,
3505 internal_label_prefix_len) == 0)
3511 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3514 tls_symbolic_operand (op, mode)
3516 enum machine_mode mode ATTRIBUTE_UNUSED;
3518 const char *symbol_str;
3520 if (GET_CODE (op) != SYMBOL_REF)
3522 symbol_str = XSTR (op, 0);
3524 if (symbol_str[0] != '%')
3526 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3530 tls_symbolic_operand_1 (op, kind)
3532 enum tls_model kind;
3534 const char *symbol_str;
3536 if (GET_CODE (op) != SYMBOL_REF)
3538 symbol_str = XSTR (op, 0);
3540 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3544 global_dynamic_symbolic_operand (op, mode)
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3548 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3552 local_dynamic_symbolic_operand (op, mode)
3554 enum machine_mode mode ATTRIBUTE_UNUSED;
3556 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3560 initial_exec_symbolic_operand (op, mode)
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3564 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3568 local_exec_symbolic_operand (op, mode)
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3572 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3575 /* Test for a valid operand for a call instruction. Don't allow the
3576 arg pointer register or virtual regs since they may decay into
3577 reg + const, which the patterns can't handle. */
3580 call_insn_operand (op, mode)
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3584 /* Disallow indirect through a virtual register. This leads to
3585 compiler aborts when trying to eliminate them. */
3586 if (GET_CODE (op) == REG
3587 && (op == arg_pointer_rtx
3588 || op == frame_pointer_rtx
3589 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3590 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3593 /* Disallow `call 1234'. Due to varying assembler lameness this
3594 gets either rejected or translated to `call .+1234'. */
3595 if (GET_CODE (op) == CONST_INT)
3598 /* Explicitly allow SYMBOL_REF even if pic. */
3599 if (GET_CODE (op) == SYMBOL_REF)
3602 /* Otherwise we can allow any general_operand in the address. */
3603 return general_operand (op, Pmode);
3606 /* Test for a valid operand for a call instruction. Don't allow the
3607 arg pointer register or virtual regs since they may decay into
3608 reg + const, which the patterns can't handle. */
3611 sibcall_insn_operand (op, mode)
3613 enum machine_mode mode ATTRIBUTE_UNUSED;
3615 /* Disallow indirect through a virtual register. This leads to
3616 compiler aborts when trying to eliminate them. */
3617 if (GET_CODE (op) == REG
3618 && (op == arg_pointer_rtx
3619 || op == frame_pointer_rtx
3620 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3621 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3624 /* Explicitly allow SYMBOL_REF even if pic. */
3625 if (GET_CODE (op) == SYMBOL_REF)
3628 /* Otherwise we can only allow register operands. */
3629 return register_operand (op, Pmode);
3633 constant_call_address_operand (op, mode)
3635 enum machine_mode mode ATTRIBUTE_UNUSED;
3637 if (GET_CODE (op) == CONST
3638 && GET_CODE (XEXP (op, 0)) == PLUS
3639 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3640 op = XEXP (XEXP (op, 0), 0);
3641 return GET_CODE (op) == SYMBOL_REF;
3644 /* Match exactly zero and one. */
3647 const0_operand (op, mode)
3649 enum machine_mode mode;
3651 return op == CONST0_RTX (mode);
3655 const1_operand (op, mode)
3657 enum machine_mode mode ATTRIBUTE_UNUSED;
3659 return op == const1_rtx;
3662 /* Match 2, 4, or 8. Used for leal multiplicands. */
3665 const248_operand (op, mode)
3667 enum machine_mode mode ATTRIBUTE_UNUSED;
3669 return (GET_CODE (op) == CONST_INT
3670 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3673 /* True if this is a constant appropriate for an increment or decrement. */
3676 incdec_operand (op, mode)
3678 enum machine_mode mode ATTRIBUTE_UNUSED;
3680 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3681 registers, since carry flag is not set. */
3682 if (TARGET_PENTIUM4 && !optimize_size)
3684 return op == const1_rtx || op == constm1_rtx;
3687 /* Return nonzero if OP is acceptable as operand of DImode shift
3691 shiftdi_operand (op, mode)
3693 enum machine_mode mode ATTRIBUTE_UNUSED;
3696 return nonimmediate_operand (op, mode);
3698 return register_operand (op, mode);
3701 /* Return false if this is the stack pointer, or any other fake
3702 register eliminable to the stack pointer. Otherwise, this is
3705 This is used to prevent esp from being used as an index reg.
3706 Which would only happen in pathological cases. */
3709 reg_no_sp_operand (op, mode)
3711 enum machine_mode mode;
3714 if (GET_CODE (t) == SUBREG)
3716 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3719 return register_operand (op, mode);
3723 mmx_reg_operand (op, mode)
3725 enum machine_mode mode ATTRIBUTE_UNUSED;
3727 return MMX_REG_P (op);
3730 /* Return false if this is any eliminable register. Otherwise
3734 general_no_elim_operand (op, mode)
3736 enum machine_mode mode;
3739 if (GET_CODE (t) == SUBREG)
3741 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3742 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3743 || t == virtual_stack_dynamic_rtx)
3746 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3747 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3750 return general_operand (op, mode);
3753 /* Return false if this is any eliminable register. Otherwise
3754 register_operand or const_int. */
3757 nonmemory_no_elim_operand (op, mode)
3759 enum machine_mode mode;
3762 if (GET_CODE (t) == SUBREG)
3764 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3765 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3766 || t == virtual_stack_dynamic_rtx)
3769 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3772 /* Return false if this is any eliminable register or stack register,
3773 otherwise work like register_operand. */
3776 index_register_operand (op, mode)
3778 enum machine_mode mode;
3781 if (GET_CODE (t) == SUBREG)
3785 if (t == arg_pointer_rtx
3786 || t == frame_pointer_rtx
3787 || t == virtual_incoming_args_rtx
3788 || t == virtual_stack_vars_rtx
3789 || t == virtual_stack_dynamic_rtx
3790 || REGNO (t) == STACK_POINTER_REGNUM)
3793 return general_operand (op, mode);
3796 /* Return true if op is a Q_REGS class register. */
3799 q_regs_operand (op, mode)
3801 enum machine_mode mode;
3803 if (mode != VOIDmode && GET_MODE (op) != mode)
3805 if (GET_CODE (op) == SUBREG)
3806 op = SUBREG_REG (op);
3807 return ANY_QI_REG_P (op);
3810 /* Return true if op is an flags register. */
3813 flags_reg_operand (op, mode)
3815 enum machine_mode mode;
3817 if (mode != VOIDmode && GET_MODE (op) != mode)
3819 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3822 /* Return true if op is a NON_Q_REGS class register. */
3825 non_q_regs_operand (op, mode)
3827 enum machine_mode mode;
3829 if (mode != VOIDmode && GET_MODE (op) != mode)
3831 if (GET_CODE (op) == SUBREG)
3832 op = SUBREG_REG (op);
3833 return NON_QI_REG_P (op);
3837 zero_extended_scalar_load_operand (op, mode)
3839 enum machine_mode mode ATTRIBUTE_UNUSED;
3842 if (GET_CODE (op) != MEM)
3844 op = maybe_get_pool_constant (op);
3847 if (GET_CODE (op) != CONST_VECTOR)
3850 (GET_MODE_SIZE (GET_MODE (op)) /
3851 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3852 for (n_elts--; n_elts > 0; n_elts--)
3854 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3855 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3861 /* Return 1 when OP is operand acceptable for standard SSE move. */
3863 vector_move_operand (op, mode)
3865 enum machine_mode mode;
3867 if (nonimmediate_operand (op, mode))
3869 if (GET_MODE (op) != mode && mode != VOIDmode)
3871 return (op == CONST0_RTX (GET_MODE (op)));
3874 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3877 sse_comparison_operator (op, mode)
3879 enum machine_mode mode ATTRIBUTE_UNUSED;
3881 enum rtx_code code = GET_CODE (op);
3884 /* Operations supported directly. */
3894 /* These are equivalent to ones above in non-IEEE comparisons. */
3901 return !TARGET_IEEE_FP;
3906 /* Return 1 if OP is a valid comparison operator in valid mode. */
3908 ix86_comparison_operator (op, mode)
3910 enum machine_mode mode;
3912 enum machine_mode inmode;
3913 enum rtx_code code = GET_CODE (op);
3914 if (mode != VOIDmode && GET_MODE (op) != mode)
3916 if (GET_RTX_CLASS (code) != '<')
3918 inmode = GET_MODE (XEXP (op, 0));
3920 if (inmode == CCFPmode || inmode == CCFPUmode)
3922 enum rtx_code second_code, bypass_code;
3923 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3924 return (bypass_code == NIL && second_code == NIL);
3931 if (inmode == CCmode || inmode == CCGCmode
3932 || inmode == CCGOCmode || inmode == CCNOmode)
3935 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3936 if (inmode == CCmode)
3940 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3948 /* Return 1 if OP is a valid comparison operator testing carry flag
3951 ix86_carry_flag_operator (op, mode)
3953 enum machine_mode mode;
3955 enum machine_mode inmode;
3956 enum rtx_code code = GET_CODE (op);
3958 if (mode != VOIDmode && GET_MODE (op) != mode)
3960 if (GET_RTX_CLASS (code) != '<')
3962 inmode = GET_MODE (XEXP (op, 0));
3963 if (GET_CODE (XEXP (op, 0)) != REG
3964 || REGNO (XEXP (op, 0)) != 17
3965 || XEXP (op, 1) != const0_rtx)
3968 if (inmode == CCFPmode || inmode == CCFPUmode)
3970 enum rtx_code second_code, bypass_code;
3972 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3973 if (bypass_code != NIL || second_code != NIL)
3975 code = ix86_fp_compare_code_to_integer (code);
3977 else if (inmode != CCmode)
3982 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3985 fcmov_comparison_operator (op, mode)
3987 enum machine_mode mode;
3989 enum machine_mode inmode;
3990 enum rtx_code code = GET_CODE (op);
3992 if (mode != VOIDmode && GET_MODE (op) != mode)
3994 if (GET_RTX_CLASS (code) != '<')
3996 inmode = GET_MODE (XEXP (op, 0));
3997 if (inmode == CCFPmode || inmode == CCFPUmode)
3999 enum rtx_code second_code, bypass_code;
4001 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4002 if (bypass_code != NIL || second_code != NIL)
4004 code = ix86_fp_compare_code_to_integer (code);
4006 /* i387 supports just limited amount of conditional codes. */
4009 case LTU: case GTU: case LEU: case GEU:
4010 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4013 case ORDERED: case UNORDERED:
4021 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4024 promotable_binary_operator (op, mode)
4026 enum machine_mode mode ATTRIBUTE_UNUSED;
4028 switch (GET_CODE (op))
4031 /* Modern CPUs have same latency for HImode and SImode multiply,
4032 but 386 and 486 do HImode multiply faster. */
4033 return ix86_tune > PROCESSOR_I486;
4045 /* Nearly general operand, but accept any const_double, since we wish
4046 to be able to drop them into memory rather than have them get pulled
4050 cmp_fp_expander_operand (op, mode)
4052 enum machine_mode mode;
4054 if (mode != VOIDmode && mode != GET_MODE (op))
4056 if (GET_CODE (op) == CONST_DOUBLE)
4058 return general_operand (op, mode);
4061 /* Match an SI or HImode register for a zero_extract. */
4064 ext_register_operand (op, mode)
4066 enum machine_mode mode ATTRIBUTE_UNUSED;
4069 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4070 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4073 if (!register_operand (op, VOIDmode))
4076 /* Be careful to accept only registers having upper parts. */
4077 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4078 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4081 /* Return 1 if this is a valid binary floating-point operation.
4082 OP is the expression matched, and MODE is its mode. */
4085 binary_fp_operator (op, mode)
4087 enum machine_mode mode;
4089 if (mode != VOIDmode && mode != GET_MODE (op))
4092 switch (GET_CODE (op))
4098 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4106 mult_operator (op, mode)
4108 enum machine_mode mode ATTRIBUTE_UNUSED;
4110 return GET_CODE (op) == MULT;
4114 div_operator (op, mode)
4116 enum machine_mode mode ATTRIBUTE_UNUSED;
4118 return GET_CODE (op) == DIV;
4122 arith_or_logical_operator (op, mode)
4124 enum machine_mode mode;
4126 return ((mode == VOIDmode || GET_MODE (op) == mode)
4127 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4128 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4131 /* Returns 1 if OP is memory operand with a displacement. */
4134 memory_displacement_operand (op, mode)
4136 enum machine_mode mode;
4138 struct ix86_address parts;
4140 if (! memory_operand (op, mode))
4143 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4146 return parts.disp != NULL_RTX;
4149 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4150 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4152 ??? It seems likely that this will only work because cmpsi is an
4153 expander, and no actual insns use this. */
4156 cmpsi_operand (op, mode)
4158 enum machine_mode mode;
4160 if (nonimmediate_operand (op, mode))
4163 if (GET_CODE (op) == AND
4164 && GET_MODE (op) == SImode
4165 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4166 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4167 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4168 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4169 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4170 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4176 /* Returns 1 if OP is memory operand that can not be represented by the
4180 long_memory_operand (op, mode)
4182 enum machine_mode mode;
4184 if (! memory_operand (op, mode))
4187 return memory_address_length (op) != 0;
4190 /* Return nonzero if the rtx is known aligned. */
4193 aligned_operand (op, mode)
4195 enum machine_mode mode;
4197 struct ix86_address parts;
4199 if (!general_operand (op, mode))
4202 /* Registers and immediate operands are always "aligned". */
4203 if (GET_CODE (op) != MEM)
4206 /* Don't even try to do any aligned optimizations with volatiles. */
4207 if (MEM_VOLATILE_P (op))
4212 /* Pushes and pops are only valid on the stack pointer. */
4213 if (GET_CODE (op) == PRE_DEC
4214 || GET_CODE (op) == POST_INC)
4217 /* Decode the address. */
4218 if (! ix86_decompose_address (op, &parts))
4221 if (parts.base && GET_CODE (parts.base) == SUBREG)
4222 parts.base = SUBREG_REG (parts.base);
4223 if (parts.index && GET_CODE (parts.index) == SUBREG)
4224 parts.index = SUBREG_REG (parts.index);
4226 /* Look for some component that isn't known to be aligned. */
4230 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4235 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4240 if (GET_CODE (parts.disp) != CONST_INT
4241 || (INTVAL (parts.disp) & 3) != 0)
4245 /* Didn't find one -- this must be an aligned address. */
4249 /* Initialize the table of extra 80387 mathematical constants. */
4252 init_ext_80387_constants ()
4254 static const char * cst[5] =
4256 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4257 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4258 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4259 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4260 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4264 for (i = 0; i < 5; i++)
4266 real_from_string (&ext_80387_constants_table[i], cst[i]);
4267 /* Ensure each constant is rounded to XFmode precision. */
4268 real_convert (&ext_80387_constants_table[i], XFmode,
4269 &ext_80387_constants_table[i]);
4272 ext_80387_constants_init = 1;
4275 /* Return true if the constant is something that can be loaded with
4276 a special instruction. */
4279 standard_80387_constant_p (x)
4282 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4285 if (x == CONST0_RTX (GET_MODE (x)))
4287 if (x == CONST1_RTX (GET_MODE (x)))
4290 /* For XFmode constants, try to find a special 80387 instruction on
4291 those CPUs that benefit from them. */
4292 if (GET_MODE (x) == XFmode
4293 && x86_ext_80387_constants & TUNEMASK)
4298 if (! ext_80387_constants_init)
4299 init_ext_80387_constants ();
4301 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4302 for (i = 0; i < 5; i++)
4303 if (real_identical (&r, &ext_80387_constants_table[i]))
4310 /* Return the opcode of the special instruction to be used to load
4314 standard_80387_constant_opcode (x)
4317 switch (standard_80387_constant_p (x))
4337 /* Return the CONST_DOUBLE representing the 80387 constant that is
4338 loaded by the specified special instruction. The argument IDX
4339 matches the return value from standard_80387_constant_p. */
4342 standard_80387_constant_rtx (idx)
4347 if (! ext_80387_constants_init)
4348 init_ext_80387_constants ();
4364 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4367 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4370 standard_sse_constant_p (x)
4373 if (x == const0_rtx)
4375 return (x == CONST0_RTX (GET_MODE (x)));
4378 /* Returns 1 if OP contains a symbol reference */
4381 symbolic_reference_mentioned_p (op)
4384 register const char *fmt;
4387 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4390 fmt = GET_RTX_FORMAT (GET_CODE (op));
4391 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4397 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4398 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4402 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4409 /* Return 1 if it is appropriate to emit `ret' instructions in the
4410 body of a function. Do this only if the epilogue is simple, needing a
4411 couple of insns. Prior to reloading, we can't tell how many registers
4412 must be saved, so return 0 then. Return 0 if there is no frame
4413 marker to de-allocate.
4415 If NON_SAVING_SETJMP is defined and true, then it is not possible
4416 for the epilogue to be simple, so return 0. This is a special case
4417 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4418 until final, but jump_optimize may need to know sooner if a
4422 ix86_can_use_return_insn_p ()
4424 struct ix86_frame frame;
4426 #ifdef NON_SAVING_SETJMP
4427 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4431 if (! reload_completed || frame_pointer_needed)
4434 /* Don't allow more than 32 pop, since that's all we can do
4435 with one instruction. */
4436 if (current_function_pops_args
4437 && current_function_args_size >= 32768)
4440 ix86_compute_frame_layout (&frame);
4441 return frame.to_allocate == 0 && frame.nregs == 0;
4444 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4446 x86_64_sign_extended_value (value)
4449 switch (GET_CODE (value))
4451 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4452 to be at least 32 and this all acceptable constants are
4453 represented as CONST_INT. */
4455 if (HOST_BITS_PER_WIDE_INT == 32)
4459 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4460 return trunc_int_for_mode (val, SImode) == val;
4464 /* For certain code models, the symbolic references are known to fit.
4465 in CM_SMALL_PIC model we know it fits if it is local to the shared
4466 library. Don't count TLS SYMBOL_REFs here, since they should fit
4467 only if inside of UNSPEC handled below. */
4469 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4471 /* For certain code models, the code is near as well. */
4473 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4474 || ix86_cmodel == CM_KERNEL);
4476 /* We also may accept the offsetted memory references in certain special
4479 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4480 switch (XINT (XEXP (value, 0), 1))
4482 case UNSPEC_GOTPCREL:
4484 case UNSPEC_GOTNTPOFF:
4490 if (GET_CODE (XEXP (value, 0)) == PLUS)
4492 rtx op1 = XEXP (XEXP (value, 0), 0);
4493 rtx op2 = XEXP (XEXP (value, 0), 1);
4494 HOST_WIDE_INT offset;
4496 if (ix86_cmodel == CM_LARGE)
4498 if (GET_CODE (op2) != CONST_INT)
4500 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4501 switch (GET_CODE (op1))
4504 /* For CM_SMALL assume that latest object is 16MB before
4505 end of 31bits boundary. We may also accept pretty
4506 large negative constants knowing that all objects are
4507 in the positive half of address space. */
4508 if (ix86_cmodel == CM_SMALL
4509 && offset < 16*1024*1024
4510 && trunc_int_for_mode (offset, SImode) == offset)
4512 /* For CM_KERNEL we know that all object resist in the
4513 negative half of 32bits address space. We may not
4514 accept negative offsets, since they may be just off
4515 and we may accept pretty large positive ones. */
4516 if (ix86_cmodel == CM_KERNEL
4518 && trunc_int_for_mode (offset, SImode) == offset)
4522 /* These conditions are similar to SYMBOL_REF ones, just the
4523 constraints for code models differ. */
4524 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4525 && offset < 16*1024*1024
4526 && trunc_int_for_mode (offset, SImode) == offset)
4528 if (ix86_cmodel == CM_KERNEL
4530 && trunc_int_for_mode (offset, SImode) == offset)
4534 switch (XINT (op1, 1))
4539 && trunc_int_for_mode (offset, SImode) == offset)
4553 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4555 x86_64_zero_extended_value (value)
4558 switch (GET_CODE (value))
4561 if (HOST_BITS_PER_WIDE_INT == 32)
4562 return (GET_MODE (value) == VOIDmode
4563 && !CONST_DOUBLE_HIGH (value));
4567 if (HOST_BITS_PER_WIDE_INT == 32)
4568 return INTVAL (value) >= 0;
4570 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4573 /* For certain code models, the symbolic references are known to fit. */
4575 return ix86_cmodel == CM_SMALL;
4577 /* For certain code models, the code is near as well. */
4579 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4581 /* We also may accept the offsetted memory references in certain special
4584 if (GET_CODE (XEXP (value, 0)) == PLUS)
4586 rtx op1 = XEXP (XEXP (value, 0), 0);
4587 rtx op2 = XEXP (XEXP (value, 0), 1);
4589 if (ix86_cmodel == CM_LARGE)
4591 switch (GET_CODE (op1))
4595 /* For small code model we may accept pretty large positive
4596 offsets, since one bit is available for free. Negative
4597 offsets are limited by the size of NULL pointer area
4598 specified by the ABI. */
4599 if (ix86_cmodel == CM_SMALL
4600 && GET_CODE (op2) == CONST_INT
4601 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4602 && (trunc_int_for_mode (INTVAL (op2), SImode)
4605 /* ??? For the kernel, we may accept adjustment of
4606 -0x10000000, since we know that it will just convert
4607 negative address space to positive, but perhaps this
4608 is not worthwhile. */
4611 /* These conditions are similar to SYMBOL_REF ones, just the
4612 constraints for code models differ. */
4613 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4614 && GET_CODE (op2) == CONST_INT
4615 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4616 && (trunc_int_for_mode (INTVAL (op2), SImode)
4630 /* Value should be nonzero if functions must have frame pointers.
4631 Zero means the frame pointer need not be set up (and parms may
4632 be accessed via the stack pointer) in functions that seem suitable. */
4635 ix86_frame_pointer_required ()
4637 /* If we accessed previous frames, then the generated code expects
4638 to be able to access the saved ebp value in our frame. */
4639 if (cfun->machine->accesses_prev_frame)
4642 /* Several x86 os'es need a frame pointer for other reasons,
4643 usually pertaining to setjmp. */
4644 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4647 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4648 the frame pointer by default. Turn it back on now if we've not
4649 got a leaf function. */
4650 if (TARGET_OMIT_LEAF_FRAME_POINTER
4651 && (!current_function_is_leaf))
4654 if (current_function_profile)
4660 /* Record that the current function accesses previous call frames. */
4663 ix86_setup_frame_addresses ()
4665 cfun->machine->accesses_prev_frame = 1;
4668 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4669 # define USE_HIDDEN_LINKONCE 1
4671 # define USE_HIDDEN_LINKONCE 0
4674 static int pic_labels_used;
4676 /* Fills in the label name that should be used for a pc thunk for
4677 the given register. */
4680 get_pc_thunk_name (name, regno)
4684 if (USE_HIDDEN_LINKONCE)
4685 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4687 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4691 /* This function generates code for -fpic that loads %ebx with
4692 the return address of the caller and then returns. */
4695 ix86_asm_file_end (file)
4701 for (regno = 0; regno < 8; ++regno)
4705 if (! ((pic_labels_used >> regno) & 1))
4708 get_pc_thunk_name (name, regno);
4710 if (USE_HIDDEN_LINKONCE)
4714 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4716 TREE_PUBLIC (decl) = 1;
4717 TREE_STATIC (decl) = 1;
4718 DECL_ONE_ONLY (decl) = 1;
4720 (*targetm.asm_out.unique_section) (decl, 0);
4721 named_section (decl, NULL, 0);
4723 (*targetm.asm_out.globalize_label) (file, name);
4724 fputs ("\t.hidden\t", file);
4725 assemble_name (file, name);
4727 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4732 ASM_OUTPUT_LABEL (file, name);
4735 xops[0] = gen_rtx_REG (SImode, regno);
4736 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4737 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4738 output_asm_insn ("ret", xops);
4742 /* Emit code for the SET_GOT patterns. */
4745 output_set_got (dest)
4751 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4753 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4755 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4758 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4760 output_asm_insn ("call\t%a2", xops);
4763 /* Output the "canonical" label name ("Lxx$pb") here too. This
4764 is what will be referred to by the Mach-O PIC subsystem. */
4765 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4767 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4768 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4771 output_asm_insn ("pop{l}\t%0", xops);
4776 get_pc_thunk_name (name, REGNO (dest));
4777 pic_labels_used |= 1 << REGNO (dest);
4779 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4780 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4781 output_asm_insn ("call\t%X2", xops);
4784 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4785 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4786 else if (!TARGET_MACHO)
4787 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4792 /* Generate an "push" pattern for input ARG. */
4798 return gen_rtx_SET (VOIDmode,
4800 gen_rtx_PRE_DEC (Pmode,
4801 stack_pointer_rtx)),
4805 /* Return >= 0 if there is an unused call-clobbered register available
4806 for the entire function. */
4809 ix86_select_alt_pic_regnum ()
4811 if (current_function_is_leaf && !current_function_profile)
4814 for (i = 2; i >= 0; --i)
4815 if (!regs_ever_live[i])
4819 return INVALID_REGNUM;
4822 /* Return 1 if we need to save REGNO. */
4824 ix86_save_reg (regno, maybe_eh_return)
4826 int maybe_eh_return;
4828 if (pic_offset_table_rtx
4829 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4830 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4831 || current_function_profile
4832 || current_function_calls_eh_return
4833 || current_function_uses_const_pool))
4835 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4840 if (current_function_calls_eh_return && maybe_eh_return)
4845 unsigned test = EH_RETURN_DATA_REGNO (i);
4846 if (test == INVALID_REGNUM)
4853 return (regs_ever_live[regno]
4854 && !call_used_regs[regno]
4855 && !fixed_regs[regno]
4856 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4859 /* Return number of registers to be saved on the stack. */
4867 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4868 if (ix86_save_reg (regno, true))
4873 /* Return the offset between two registers, one to be eliminated, and the other
4874 its replacement, at the start of a routine. */
4877 ix86_initial_elimination_offset (from, to)
4881 struct ix86_frame frame;
4882 ix86_compute_frame_layout (&frame);
4884 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4885 return frame.hard_frame_pointer_offset;
4886 else if (from == FRAME_POINTER_REGNUM
4887 && to == HARD_FRAME_POINTER_REGNUM)
4888 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4891 if (to != STACK_POINTER_REGNUM)
4893 else if (from == ARG_POINTER_REGNUM)
4894 return frame.stack_pointer_offset;
4895 else if (from != FRAME_POINTER_REGNUM)
4898 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4902 /* Fill structure ix86_frame about frame of currently computed function. */
4905 ix86_compute_frame_layout (frame)
4906 struct ix86_frame *frame;
4908 HOST_WIDE_INT total_size;
4909 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4911 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4912 HOST_WIDE_INT size = get_frame_size ();
4914 frame->nregs = ix86_nsaved_regs ();
4917 /* During reload iteration the amount of registers saved can change.
4918 Recompute the value as needed. Do not recompute when amount of registers
4919 didn't change as reload does mutiple calls to the function and does not
4920 expect the decision to change within single iteration. */
4922 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4924 int count = frame->nregs;
4926 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4927 /* The fast prologue uses move instead of push to save registers. This
4928 is significantly longer, but also executes faster as modern hardware
4929 can execute the moves in parallel, but can't do that for push/pop.
4931 Be careful about choosing what prologue to emit: When function takes
4932 many instructions to execute we may use slow version as well as in
4933 case function is known to be outside hot spot (this is known with
4934 feedback only). Weight the size of function by number of registers
4935 to save as it is cheap to use one or two push instructions but very
4936 slow to use many of them. */
4938 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4939 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4940 || (flag_branch_probabilities
4941 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4942 cfun->machine->use_fast_prologue_epilogue = false;
4944 cfun->machine->use_fast_prologue_epilogue
4945 = !expensive_function_p (count);
4947 if (TARGET_PROLOGUE_USING_MOVE
4948 && cfun->machine->use_fast_prologue_epilogue)
4949 frame->save_regs_using_mov = true;
4951 frame->save_regs_using_mov = false;
4954 /* Skip return address and saved base pointer. */
4955 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4957 frame->hard_frame_pointer_offset = offset;
4959 /* Do some sanity checking of stack_alignment_needed and
4960 preferred_alignment, since i386 port is the only using those features
4961 that may break easily. */
4963 if (size && !stack_alignment_needed)
4965 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4967 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4969 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4972 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4973 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4975 /* Register save area */
4976 offset += frame->nregs * UNITS_PER_WORD;
4979 if (ix86_save_varrargs_registers)
4981 offset += X86_64_VARARGS_SIZE;
4982 frame->va_arg_size = X86_64_VARARGS_SIZE;
4985 frame->va_arg_size = 0;
4987 /* Align start of frame for local function. */
4988 frame->padding1 = ((offset + stack_alignment_needed - 1)
4989 & -stack_alignment_needed) - offset;
4991 offset += frame->padding1;
4993 /* Frame pointer points here. */
4994 frame->frame_pointer_offset = offset;
4998 /* Add outgoing arguments area. Can be skipped if we eliminated
4999 all the function calls as dead code. */
5000 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5002 offset += current_function_outgoing_args_size;
5003 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5006 frame->outgoing_arguments_size = 0;
5008 /* Align stack boundary. Only needed if we're calling another function
5010 if (!current_function_is_leaf || current_function_calls_alloca)
5011 frame->padding2 = ((offset + preferred_alignment - 1)
5012 & -preferred_alignment) - offset;
5014 frame->padding2 = 0;
5016 offset += frame->padding2;
5018 /* We've reached end of stack frame. */
5019 frame->stack_pointer_offset = offset;
5021 /* Size prologue needs to allocate. */
5022 frame->to_allocate =
5023 (size + frame->padding1 + frame->padding2
5024 + frame->outgoing_arguments_size + frame->va_arg_size);
5026 if (!frame->to_allocate && frame->nregs <= 1)
5027 frame->save_regs_using_mov = false;
5029 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
5030 && current_function_is_leaf)
5032 frame->red_zone_size = frame->to_allocate;
5033 if (frame->save_regs_using_mov)
5034 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5035 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5036 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5039 frame->red_zone_size = 0;
5040 frame->to_allocate -= frame->red_zone_size;
5041 frame->stack_pointer_offset -= frame->red_zone_size;
5043 fprintf (stderr, "nregs: %i\n", frame->nregs);
5044 fprintf (stderr, "size: %i\n", size);
5045 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5046 fprintf (stderr, "padding1: %i\n", frame->padding1);
5047 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5048 fprintf (stderr, "padding2: %i\n", frame->padding2);
5049 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5050 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5051 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5052 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5053 frame->hard_frame_pointer_offset);
5054 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5058 /* Emit code to save registers in the prologue. */
5061 ix86_emit_save_regs ()
5066 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5067 if (ix86_save_reg (regno, true))
5069 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5070 RTX_FRAME_RELATED_P (insn) = 1;
5074 /* Emit code to save registers using MOV insns. First register
5075 is restored from POINTER + OFFSET. */
5077 ix86_emit_save_regs_using_mov (pointer, offset)
5079 HOST_WIDE_INT offset;
5084 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5085 if (ix86_save_reg (regno, true))
5087 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5089 gen_rtx_REG (Pmode, regno));
5090 RTX_FRAME_RELATED_P (insn) = 1;
5091 offset += UNITS_PER_WORD;
5095 /* Expand the prologue into a bunch of separate insns. */
5098 ix86_expand_prologue ()
5102 struct ix86_frame frame;
5103 HOST_WIDE_INT allocate;
5105 ix86_compute_frame_layout (&frame);
5107 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5108 slower on all targets. Also sdb doesn't like it. */
5110 if (frame_pointer_needed)
5112 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5113 RTX_FRAME_RELATED_P (insn) = 1;
5115 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5116 RTX_FRAME_RELATED_P (insn) = 1;
5119 allocate = frame.to_allocate;
5121 if (!frame.save_regs_using_mov)
5122 ix86_emit_save_regs ();
5124 allocate += frame.nregs * UNITS_PER_WORD;
5126 /* When using red zone we may start register saving before allocating
5127 the stack frame saving one cycle of the prologue. */
5128 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5129 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5130 : stack_pointer_rtx,
5131 -frame.nregs * UNITS_PER_WORD);
5135 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5137 insn = emit_insn (gen_pro_epilogue_adjust_stack
5138 (stack_pointer_rtx, stack_pointer_rtx,
5139 GEN_INT (-allocate)));
5140 RTX_FRAME_RELATED_P (insn) = 1;
5144 /* ??? Is this only valid for Win32? */
5151 arg0 = gen_rtx_REG (SImode, 0);
5152 emit_move_insn (arg0, GEN_INT (allocate));
5154 sym = gen_rtx_MEM (FUNCTION_MODE,
5155 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5156 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5158 CALL_INSN_FUNCTION_USAGE (insn)
5159 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5160 CALL_INSN_FUNCTION_USAGE (insn));
5162 /* Don't allow scheduling pass to move insns across __alloca
5164 emit_insn (gen_blockage (const0_rtx));
5166 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5168 if (!frame_pointer_needed || !frame.to_allocate)
5169 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5171 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5172 -frame.nregs * UNITS_PER_WORD);
5175 #ifdef SUBTARGET_PROLOGUE
5179 pic_reg_used = false;
5180 if (pic_offset_table_rtx
5181 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5182 || current_function_profile))
5184 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5186 if (alt_pic_reg_used != INVALID_REGNUM)
5187 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5189 pic_reg_used = true;
5194 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5196 /* Even with accurate pre-reload life analysis, we can wind up
5197 deleting all references to the pic register after reload.
5198 Consider if cross-jumping unifies two sides of a branch
5199 controlled by a comparison vs the only read from a global.
5200 In which case, allow the set_got to be deleted, though we're
5201 too late to do anything about the ebx save in the prologue. */
5202 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5205 /* Prevent function calls from be scheduled before the call to mcount.
5206 In the pic_reg_used case, make sure that the got load isn't deleted. */
5207 if (current_function_profile)
5208 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5211 /* Emit code to restore saved registers using MOV insns. First register
5212 is restored from POINTER + OFFSET. */
5214 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5217 int maybe_eh_return;
5221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5222 if (ix86_save_reg (regno, maybe_eh_return))
5224 emit_move_insn (gen_rtx_REG (Pmode, regno),
5225 adjust_address (gen_rtx_MEM (Pmode, pointer),
5227 offset += UNITS_PER_WORD;
5231 /* Restore function stack, frame, and registers. */
5234 ix86_expand_epilogue (style)
5238 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5239 struct ix86_frame frame;
5240 HOST_WIDE_INT offset;
5242 ix86_compute_frame_layout (&frame);
5244 /* Calculate start of saved registers relative to ebp. Special care
5245 must be taken for the normal return case of a function using
5246 eh_return: the eax and edx registers are marked as saved, but not
5247 restored along this path. */
5248 offset = frame.nregs;
5249 if (current_function_calls_eh_return && style != 2)
5251 offset *= -UNITS_PER_WORD;
5253 /* If we're only restoring one register and sp is not valid then
5254 using a move instruction to restore the register since it's
5255 less work than reloading sp and popping the register.
5257 The default code result in stack adjustment using add/lea instruction,
5258 while this code results in LEAVE instruction (or discrete equivalent),
5259 so it is profitable in some other cases as well. Especially when there
5260 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5261 and there is exactly one register to pop. This heuristic may need some
5262 tuning in future. */
5263 if ((!sp_valid && frame.nregs <= 1)
5264 || (TARGET_EPILOGUE_USING_MOVE
5265 && cfun->machine->use_fast_prologue_epilogue
5266 && (frame.nregs > 1 || frame.to_allocate))
5267 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5268 || (frame_pointer_needed && TARGET_USE_LEAVE
5269 && cfun->machine->use_fast_prologue_epilogue
5270 && frame.nregs == 1)
5271 || current_function_calls_eh_return)
5273 /* Restore registers. We can use ebp or esp to address the memory
5274 locations. If both are available, default to ebp, since offsets
5275 are known to be small. Only exception is esp pointing directly to the
5276 end of block of saved registers, where we may simplify addressing
5279 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5280 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5281 frame.to_allocate, style == 2);
5283 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5284 offset, style == 2);
5286 /* eh_return epilogues need %ecx added to the stack pointer. */
5289 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5291 if (frame_pointer_needed)
5293 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5294 tmp = plus_constant (tmp, UNITS_PER_WORD);
5295 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5297 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5298 emit_move_insn (hard_frame_pointer_rtx, tmp);
5300 emit_insn (gen_pro_epilogue_adjust_stack
5301 (stack_pointer_rtx, sa, const0_rtx));
5305 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5306 tmp = plus_constant (tmp, (frame.to_allocate
5307 + frame.nregs * UNITS_PER_WORD));
5308 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5311 else if (!frame_pointer_needed)
5312 emit_insn (gen_pro_epilogue_adjust_stack
5313 (stack_pointer_rtx, stack_pointer_rtx,
5314 GEN_INT (frame.to_allocate
5315 + frame.nregs * UNITS_PER_WORD)));
5316 /* If not an i386, mov & pop is faster than "leave". */
5317 else if (TARGET_USE_LEAVE || optimize_size
5318 || !cfun->machine->use_fast_prologue_epilogue)
5319 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5322 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5323 hard_frame_pointer_rtx,
5326 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5328 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5333 /* First step is to deallocate the stack frame so that we can
5334 pop the registers. */
5337 if (!frame_pointer_needed)
5339 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5340 hard_frame_pointer_rtx,
5343 else if (frame.to_allocate)
5344 emit_insn (gen_pro_epilogue_adjust_stack
5345 (stack_pointer_rtx, stack_pointer_rtx,
5346 GEN_INT (frame.to_allocate)));
5348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5349 if (ix86_save_reg (regno, false))
5352 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5354 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5356 if (frame_pointer_needed)
5358 /* Leave results in shorter dependency chains on CPUs that are
5359 able to grok it fast. */
5360 if (TARGET_USE_LEAVE)
5361 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5362 else if (TARGET_64BIT)
5363 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5365 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5369 /* Sibcall epilogues don't want a return instruction. */
5373 if (current_function_pops_args && current_function_args_size)
5375 rtx popc = GEN_INT (current_function_pops_args);
5377 /* i386 can only pop 64K bytes. If asked to pop more, pop
5378 return address, do explicit add, and jump indirectly to the
5381 if (current_function_pops_args >= 65536)
5383 rtx ecx = gen_rtx_REG (SImode, 2);
5385 /* There are is no "pascal" calling convention in 64bit ABI. */
5389 emit_insn (gen_popsi1 (ecx));
5390 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5391 emit_jump_insn (gen_return_indirect_internal (ecx));
5394 emit_jump_insn (gen_return_pop_internal (popc));
5397 emit_jump_insn (gen_return_internal ());
5400 /* Reset from the function's potential modifications. */
5403 ix86_output_function_epilogue (file, size)
5404 FILE *file ATTRIBUTE_UNUSED;
5405 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5407 if (pic_offset_table_rtx)
5408 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5411 /* Extract the parts of an RTL expression that is a valid memory address
5412 for an instruction. Return 0 if the structure of the address is
5413 grossly off. Return -1 if the address contains ASHIFT, so it is not
5414 strictly valid, but still used for computing length of lea instruction.
5418 ix86_decompose_address (addr, out)
5420 struct ix86_address *out;
5422 rtx base = NULL_RTX;
5423 rtx index = NULL_RTX;
5424 rtx disp = NULL_RTX;
5425 HOST_WIDE_INT scale = 1;
5426 rtx scale_rtx = NULL_RTX;
5429 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5431 else if (GET_CODE (addr) == PLUS)
5433 rtx op0 = XEXP (addr, 0);
5434 rtx op1 = XEXP (addr, 1);
5435 enum rtx_code code0 = GET_CODE (op0);
5436 enum rtx_code code1 = GET_CODE (op1);
5438 if (code0 == REG || code0 == SUBREG)
5440 if (code1 == REG || code1 == SUBREG)
5441 index = op0, base = op1; /* index + base */
5443 base = op0, disp = op1; /* base + displacement */
5445 else if (code0 == MULT)
5447 index = XEXP (op0, 0);
5448 scale_rtx = XEXP (op0, 1);
5449 if (code1 == REG || code1 == SUBREG)
5450 base = op1; /* index*scale + base */
5452 disp = op1; /* index*scale + disp */
5454 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5456 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5457 scale_rtx = XEXP (XEXP (op0, 0), 1);
5458 base = XEXP (op0, 1);
5461 else if (code0 == PLUS)
5463 index = XEXP (op0, 0); /* index + base + disp */
5464 base = XEXP (op0, 1);
5470 else if (GET_CODE (addr) == MULT)
5472 index = XEXP (addr, 0); /* index*scale */
5473 scale_rtx = XEXP (addr, 1);
5475 else if (GET_CODE (addr) == ASHIFT)
5479 /* We're called for lea too, which implements ashift on occasion. */
5480 index = XEXP (addr, 0);
5481 tmp = XEXP (addr, 1);
5482 if (GET_CODE (tmp) != CONST_INT)
5484 scale = INTVAL (tmp);
5485 if ((unsigned HOST_WIDE_INT) scale > 3)
5491 disp = addr; /* displacement */
5493 /* Extract the integral value of scale. */
5496 if (GET_CODE (scale_rtx) != CONST_INT)
5498 scale = INTVAL (scale_rtx);
5501 /* Allow arg pointer and stack pointer as index if there is not scaling */
5502 if (base && index && scale == 1
5503 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5504 || index == stack_pointer_rtx))
5511 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5512 if ((base == hard_frame_pointer_rtx
5513 || base == frame_pointer_rtx
5514 || base == arg_pointer_rtx) && !disp)
5517 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5518 Avoid this by transforming to [%esi+0]. */
5519 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5520 && base && !index && !disp
5522 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5525 /* Special case: encode reg+reg instead of reg*2. */
5526 if (!base && index && scale && scale == 2)
5527 base = index, scale = 1;
5529 /* Special case: scaling cannot be encoded without base or displacement. */
5530 if (!base && !disp && index && scale != 1)
5541 /* Return cost of the memory address x.
5542 For i386, it is better to use a complex address than let gcc copy
5543 the address into a reg and make a new pseudo. But not if the address
5544 requires to two regs - that would mean more pseudos with longer
5547 ix86_address_cost (x)
5550 struct ix86_address parts;
5553 if (!ix86_decompose_address (x, &parts))
5556 if (parts.base && GET_CODE (parts.base) == SUBREG)
5557 parts.base = SUBREG_REG (parts.base);
5558 if (parts.index && GET_CODE (parts.index) == SUBREG)
5559 parts.index = SUBREG_REG (parts.index);
5561 /* More complex memory references are better. */
5562 if (parts.disp && parts.disp != const0_rtx)
5565 /* Attempt to minimize number of registers in the address. */
5567 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5569 && (!REG_P (parts.index)
5570 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5574 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5576 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5577 && parts.base != parts.index)
5580 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5581 since it's predecode logic can't detect the length of instructions
5582 and it degenerates to vector decoded. Increase cost of such
5583 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5584 to split such addresses or even refuse such addresses at all.
5586 Following addressing modes are affected:
5591 The first and last case may be avoidable by explicitly coding the zero in
5592 memory address, but I don't have AMD-K6 machine handy to check this
5596 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5597 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5598 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5604 /* If X is a machine specific address (i.e. a symbol or label being
5605 referenced as a displacement from the GOT implemented using an
5606 UNSPEC), then return the base term. Otherwise return X. */
5609 ix86_find_base_term (x)
5616 if (GET_CODE (x) != CONST)
5619 if (GET_CODE (term) == PLUS
5620 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5621 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5622 term = XEXP (term, 0);
5623 if (GET_CODE (term) != UNSPEC
5624 || XINT (term, 1) != UNSPEC_GOTPCREL)
5627 term = XVECEXP (term, 0, 0);
5629 if (GET_CODE (term) != SYMBOL_REF
5630 && GET_CODE (term) != LABEL_REF)
5636 term = ix86_delegitimize_address (x);
5638 if (GET_CODE (term) != SYMBOL_REF
5639 && GET_CODE (term) != LABEL_REF)
5645 /* Determine if a given RTX is a valid constant. We already know this
5646 satisfies CONSTANT_P. */
5649 legitimate_constant_p (x)
5654 switch (GET_CODE (x))
5657 /* TLS symbols are not constant. */
5658 if (tls_symbolic_operand (x, Pmode))
5663 inner = XEXP (x, 0);
5665 /* Offsets of TLS symbols are never valid.
5666 Discourage CSE from creating them. */
5667 if (GET_CODE (inner) == PLUS
5668 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5671 /* Only some unspecs are valid as "constants". */
5672 if (GET_CODE (inner) == UNSPEC)
5673 switch (XINT (inner, 1))
5676 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5686 /* Otherwise we handle everything else in the move patterns. */
5690 /* Determine if it's legal to put X into the constant pool. This
5691 is not possible for the address of thread-local symbols, which
5692 is checked above. */
5695 ix86_cannot_force_const_mem (x)
5698 return !legitimate_constant_p (x);
5701 /* Determine if a given RTX is a valid constant address. */
5704 constant_address_p (x)
5707 switch (GET_CODE (x))
5714 return TARGET_64BIT;
5717 /* For Mach-O, really believe the CONST. */
5720 /* Otherwise fall through. */
5722 return !flag_pic && legitimate_constant_p (x);
5729 /* Nonzero if the constant value X is a legitimate general operand
5730 when generating PIC code. It is given that flag_pic is on and
5731 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5734 legitimate_pic_operand_p (x)
5739 switch (GET_CODE (x))
5742 inner = XEXP (x, 0);
5744 /* Only some unspecs are valid as "constants". */
5745 if (GET_CODE (inner) == UNSPEC)
5746 switch (XINT (inner, 1))
5749 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5757 return legitimate_pic_address_disp_p (x);
5764 /* Determine if a given CONST RTX is a valid memory displacement
5768 legitimate_pic_address_disp_p (disp)
5773 /* In 64bit mode we can allow direct addresses of symbols and labels
5774 when they are not dynamic symbols. */
5777 /* TLS references should always be enclosed in UNSPEC. */
5778 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5780 if (GET_CODE (disp) == SYMBOL_REF
5781 && ix86_cmodel == CM_SMALL_PIC
5782 && (CONSTANT_POOL_ADDRESS_P (disp)
5783 || SYMBOL_REF_FLAG (disp)))
5785 if (GET_CODE (disp) == LABEL_REF)
5787 if (GET_CODE (disp) == CONST
5788 && GET_CODE (XEXP (disp, 0)) == PLUS
5789 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5790 && ix86_cmodel == CM_SMALL_PIC
5791 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5792 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5793 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5794 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5795 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5796 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5799 if (GET_CODE (disp) != CONST)
5801 disp = XEXP (disp, 0);
5805 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5806 of GOT tables. We should not need these anyway. */
5807 if (GET_CODE (disp) != UNSPEC
5808 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5811 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5812 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5818 if (GET_CODE (disp) == PLUS)
5820 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5822 disp = XEXP (disp, 0);
5826 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5827 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5829 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5830 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5831 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5833 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5834 if (strstr (sym_name, "$pb") != 0)
5839 if (GET_CODE (disp) != UNSPEC)
5842 switch (XINT (disp, 1))
5847 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5849 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5850 case UNSPEC_GOTTPOFF:
5851 case UNSPEC_GOTNTPOFF:
5852 case UNSPEC_INDNTPOFF:
5855 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5857 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5859 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5865 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5866 memory address for an instruction. The MODE argument is the machine mode
5867 for the MEM expression that wants to use this address.
5869 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5870 convert common non-canonical forms to canonical form so that they will
5874 legitimate_address_p (mode, addr, strict)
5875 enum machine_mode mode;
5879 struct ix86_address parts;
5880 rtx base, index, disp;
5881 HOST_WIDE_INT scale;
5882 const char *reason = NULL;
5883 rtx reason_rtx = NULL_RTX;
5885 if (TARGET_DEBUG_ADDR)
5888 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5889 GET_MODE_NAME (mode), strict);
5893 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5895 if (TARGET_DEBUG_ADDR)
5896 fprintf (stderr, "Success.\n");
5900 if (ix86_decompose_address (addr, &parts) <= 0)
5902 reason = "decomposition failed";
5907 index = parts.index;
5909 scale = parts.scale;
5911 /* Validate base register.
5913 Don't allow SUBREG's here, it can lead to spill failures when the base
5914 is one word out of a two word structure, which is represented internally
5922 if (GET_CODE (base) == SUBREG)
5923 reg = SUBREG_REG (base);
5927 if (GET_CODE (reg) != REG)
5929 reason = "base is not a register";
5933 if (GET_MODE (base) != Pmode)
5935 reason = "base is not in Pmode";
5939 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5940 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5942 reason = "base is not valid";
5947 /* Validate index register.
5949 Don't allow SUBREG's here, it can lead to spill failures when the index
5950 is one word out of a two word structure, which is represented internally
5958 if (GET_CODE (index) == SUBREG)
5959 reg = SUBREG_REG (index);
5963 if (GET_CODE (reg) != REG)
5965 reason = "index is not a register";
5969 if (GET_MODE (index) != Pmode)
5971 reason = "index is not in Pmode";
5975 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5976 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5978 reason = "index is not valid";
5983 /* Validate scale factor. */
5986 reason_rtx = GEN_INT (scale);
5989 reason = "scale without index";
5993 if (scale != 2 && scale != 4 && scale != 8)
5995 reason = "scale is not a valid multiplier";
6000 /* Validate displacement. */
6005 if (GET_CODE (disp) == CONST
6006 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6007 switch (XINT (XEXP (disp, 0), 1))
6011 case UNSPEC_GOTPCREL:
6014 goto is_legitimate_pic;
6016 case UNSPEC_GOTTPOFF:
6017 case UNSPEC_GOTNTPOFF:
6018 case UNSPEC_INDNTPOFF:
6024 reason = "invalid address unspec";
6028 else if (flag_pic && (SYMBOLIC_CONST (disp)
6030 && !machopic_operand_p (disp)
6035 if (TARGET_64BIT && (index || base))
6037 /* foo@dtpoff(%rX) is ok. */
6038 if (GET_CODE (disp) != CONST
6039 || GET_CODE (XEXP (disp, 0)) != PLUS
6040 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6041 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6042 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6043 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6045 reason = "non-constant pic memory reference";
6049 else if (! legitimate_pic_address_disp_p (disp))
6051 reason = "displacement is an invalid pic construct";
6055 /* This code used to verify that a symbolic pic displacement
6056 includes the pic_offset_table_rtx register.
6058 While this is good idea, unfortunately these constructs may
6059 be created by "adds using lea" optimization for incorrect
6068 This code is nonsensical, but results in addressing
6069 GOT table with pic_offset_table_rtx base. We can't
6070 just refuse it easily, since it gets matched by
6071 "addsi3" pattern, that later gets split to lea in the
6072 case output register differs from input. While this
6073 can be handled by separate addsi pattern for this case
6074 that never results in lea, this seems to be easier and
6075 correct fix for crash to disable this test. */
6077 else if (!CONSTANT_ADDRESS_P (disp))
6079 reason = "displacement is not constant";
6082 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6084 reason = "displacement is out of range";
6087 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
6089 reason = "displacement is a const_double";
6094 /* Everything looks valid. */
6095 if (TARGET_DEBUG_ADDR)
6096 fprintf (stderr, "Success.\n");
6100 if (TARGET_DEBUG_ADDR)
6102 fprintf (stderr, "Error: %s\n", reason);
6103 debug_rtx (reason_rtx);
6108 /* Return an unique alias set for the GOT. */
6110 static HOST_WIDE_INT
6111 ix86_GOT_alias_set ()
6113 static HOST_WIDE_INT set = -1;
6115 set = new_alias_set ();
6119 /* Return a legitimate reference for ORIG (an address) using the
6120 register REG. If REG is 0, a new pseudo is generated.
6122 There are two types of references that must be handled:
6124 1. Global data references must load the address from the GOT, via
6125 the PIC reg. An insn is emitted to do this load, and the reg is
6128 2. Static data references, constant pool addresses, and code labels
6129 compute the address as an offset from the GOT, whose base is in
6130 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
6131 differentiate them from global data objects. The returned
6132 address is the PIC reg + an unspec constant.
6134 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6135 reg also appears in the address. */
6138 legitimize_pic_address (orig, reg)
6148 reg = gen_reg_rtx (Pmode);
6149 /* Use the generic Mach-O PIC machinery. */
6150 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6153 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6155 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6157 /* This symbol may be referenced via a displacement from the PIC
6158 base address (@GOTOFF). */
6160 if (reload_in_progress)
6161 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6162 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6163 new = gen_rtx_CONST (Pmode, new);
6164 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6168 emit_move_insn (reg, new);
6172 else if (GET_CODE (addr) == SYMBOL_REF)
6176 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6177 new = gen_rtx_CONST (Pmode, new);
6178 new = gen_rtx_MEM (Pmode, new);
6179 RTX_UNCHANGING_P (new) = 1;
6180 set_mem_alias_set (new, ix86_GOT_alias_set ());
6183 reg = gen_reg_rtx (Pmode);
6184 /* Use directly gen_movsi, otherwise the address is loaded
6185 into register for CSE. We don't want to CSE this addresses,
6186 instead we CSE addresses from the GOT table, so skip this. */
6187 emit_insn (gen_movsi (reg, new));
6192 /* This symbol must be referenced via a load from the
6193 Global Offset Table (@GOT). */
6195 if (reload_in_progress)
6196 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6197 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6198 new = gen_rtx_CONST (Pmode, new);
6199 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6200 new = gen_rtx_MEM (Pmode, new);
6201 RTX_UNCHANGING_P (new) = 1;
6202 set_mem_alias_set (new, ix86_GOT_alias_set ());
6205 reg = gen_reg_rtx (Pmode);
6206 emit_move_insn (reg, new);
6212 if (GET_CODE (addr) == CONST)
6214 addr = XEXP (addr, 0);
6216 /* We must match stuff we generate before. Assume the only
6217 unspecs that can get here are ours. Not that we could do
6218 anything with them anyway... */
6219 if (GET_CODE (addr) == UNSPEC
6220 || (GET_CODE (addr) == PLUS
6221 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6223 if (GET_CODE (addr) != PLUS)
6226 if (GET_CODE (addr) == PLUS)
6228 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6230 /* Check first to see if this is a constant offset from a @GOTOFF
6231 symbol reference. */
6232 if (local_symbolic_operand (op0, Pmode)
6233 && GET_CODE (op1) == CONST_INT)
6237 if (reload_in_progress)
6238 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6239 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6241 new = gen_rtx_PLUS (Pmode, new, op1);
6242 new = gen_rtx_CONST (Pmode, new);
6243 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6247 emit_move_insn (reg, new);
6253 if (INTVAL (op1) < -16*1024*1024
6254 || INTVAL (op1) >= 16*1024*1024)
6255 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6260 base = legitimize_pic_address (XEXP (addr, 0), reg);
6261 new = legitimize_pic_address (XEXP (addr, 1),
6262 base == reg ? NULL_RTX : reg);
6264 if (GET_CODE (new) == CONST_INT)
6265 new = plus_constant (base, INTVAL (new));
6268 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6270 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6271 new = XEXP (new, 1);
6273 new = gen_rtx_PLUS (Pmode, base, new);
6282 ix86_encode_section_info (decl, first)
6284 int first ATTRIBUTE_UNUSED;
6286 bool local_p = (*targetm.binds_local_p) (decl);
6289 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6290 if (GET_CODE (rtl) != MEM)
6292 symbol = XEXP (rtl, 0);
6293 if (GET_CODE (symbol) != SYMBOL_REF)
6296 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6297 symbol so that we may access it directly in the GOT. */
6300 SYMBOL_REF_FLAG (symbol) = local_p;
6302 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6303 "local dynamic", "initial exec" or "local exec" TLS models
6306 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6308 const char *symbol_str;
6311 enum tls_model kind = decl_tls_model (decl);
6313 if (TARGET_64BIT && ! flag_pic)
6315 /* x86-64 doesn't allow non-pic code for shared libraries,
6316 so don't generate GD/LD TLS models for non-pic code. */
6319 case TLS_MODEL_GLOBAL_DYNAMIC:
6320 kind = TLS_MODEL_INITIAL_EXEC; break;
6321 case TLS_MODEL_LOCAL_DYNAMIC:
6322 kind = TLS_MODEL_LOCAL_EXEC; break;
6328 symbol_str = XSTR (symbol, 0);
6330 if (symbol_str[0] == '%')
6332 if (symbol_str[1] == tls_model_chars[kind])
6336 len = strlen (symbol_str) + 1;
6337 newstr = alloca (len + 2);
6340 newstr[1] = tls_model_chars[kind];
6341 memcpy (newstr + 2, symbol_str, len);
6343 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6347 /* Undo the above when printing symbol names. */
6350 ix86_strip_name_encoding (str)
6360 /* Load the thread pointer into a register. */
6363 get_thread_pointer ()
6367 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6368 tp = gen_rtx_MEM (Pmode, tp);
6369 RTX_UNCHANGING_P (tp) = 1;
6370 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6371 tp = force_reg (Pmode, tp);
6376 /* Try machine-dependent ways of modifying an illegitimate address
6377 to be legitimate. If we find one, return the new, valid address.
6378 This macro is used in only one place: `memory_address' in explow.c.
6380 OLDX is the address as it was before break_out_memory_refs was called.
6381 In some cases it is useful to look at this to decide what needs to be done.
6383 MODE and WIN are passed so that this macro can use
6384 GO_IF_LEGITIMATE_ADDRESS.
6386 It is always safe for this macro to do nothing. It exists to recognize
6387 opportunities to optimize the output.
6389 For the 80386, we handle X+REG by loading X into a register R and
6390 using R+REG. R will go in a general reg and indexing will be used.
6391 However, if REG is a broken-out memory address or multiplication,
6392 nothing needs to be done because REG can certainly go in a general reg.
6394 When -fpic is used, special handling is needed for symbolic references.
6395 See comments by legitimize_pic_address in i386.c for details. */
6398 legitimize_address (x, oldx, mode)
6400 register rtx oldx ATTRIBUTE_UNUSED;
6401 enum machine_mode mode;
6406 if (TARGET_DEBUG_ADDR)
6408 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6409 GET_MODE_NAME (mode));
6413 log = tls_symbolic_operand (x, mode);
6416 rtx dest, base, off, pic;
6421 case TLS_MODEL_GLOBAL_DYNAMIC:
6422 dest = gen_reg_rtx (Pmode);
6425 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6428 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6429 insns = get_insns ();
6432 emit_libcall_block (insns, dest, rax, x);
6435 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6438 case TLS_MODEL_LOCAL_DYNAMIC:
6439 base = gen_reg_rtx (Pmode);
6442 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6445 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6446 insns = get_insns ();
6449 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6450 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6451 emit_libcall_block (insns, base, rax, note);
6454 emit_insn (gen_tls_local_dynamic_base_32 (base));
6456 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6457 off = gen_rtx_CONST (Pmode, off);
6459 return gen_rtx_PLUS (Pmode, base, off);
6461 case TLS_MODEL_INITIAL_EXEC:
6465 type = UNSPEC_GOTNTPOFF;
6469 if (reload_in_progress)
6470 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6471 pic = pic_offset_table_rtx;
6472 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6474 else if (!TARGET_GNU_TLS)
6476 pic = gen_reg_rtx (Pmode);
6477 emit_insn (gen_set_got (pic));
6478 type = UNSPEC_GOTTPOFF;
6483 type = UNSPEC_INDNTPOFF;
6486 base = get_thread_pointer ();
6488 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6489 off = gen_rtx_CONST (Pmode, off);
6491 off = gen_rtx_PLUS (Pmode, pic, off);
6492 off = gen_rtx_MEM (Pmode, off);
6493 RTX_UNCHANGING_P (off) = 1;
6494 set_mem_alias_set (off, ix86_GOT_alias_set ());
6495 dest = gen_reg_rtx (Pmode);
6497 if (TARGET_64BIT || TARGET_GNU_TLS)
6499 emit_move_insn (dest, off);
6500 return gen_rtx_PLUS (Pmode, base, dest);
6503 emit_insn (gen_subsi3 (dest, base, off));
6506 case TLS_MODEL_LOCAL_EXEC:
6507 base = get_thread_pointer ();
6509 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6510 (TARGET_64BIT || TARGET_GNU_TLS)
6511 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6512 off = gen_rtx_CONST (Pmode, off);
6514 if (TARGET_64BIT || TARGET_GNU_TLS)
6515 return gen_rtx_PLUS (Pmode, base, off);
6518 dest = gen_reg_rtx (Pmode);
6519 emit_insn (gen_subsi3 (dest, base, off));
6530 if (flag_pic && SYMBOLIC_CONST (x))
6531 return legitimize_pic_address (x, 0);
6533 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6534 if (GET_CODE (x) == ASHIFT
6535 && GET_CODE (XEXP (x, 1)) == CONST_INT
6536 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6539 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6540 GEN_INT (1 << log));
6543 if (GET_CODE (x) == PLUS)
6545 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6547 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6548 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6549 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6552 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6553 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6554 GEN_INT (1 << log));
6557 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6558 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6559 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6562 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6563 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6564 GEN_INT (1 << log));
6567 /* Put multiply first if it isn't already. */
6568 if (GET_CODE (XEXP (x, 1)) == MULT)
6570 rtx tmp = XEXP (x, 0);
6571 XEXP (x, 0) = XEXP (x, 1);
6576 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6577 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6578 created by virtual register instantiation, register elimination, and
6579 similar optimizations. */
6580 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6583 x = gen_rtx_PLUS (Pmode,
6584 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6585 XEXP (XEXP (x, 1), 0)),
6586 XEXP (XEXP (x, 1), 1));
6590 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6591 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6592 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6593 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6594 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6595 && CONSTANT_P (XEXP (x, 1)))
6598 rtx other = NULL_RTX;
6600 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6602 constant = XEXP (x, 1);
6603 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6605 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6607 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6608 other = XEXP (x, 1);
6616 x = gen_rtx_PLUS (Pmode,
6617 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6618 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6619 plus_constant (other, INTVAL (constant)));
6623 if (changed && legitimate_address_p (mode, x, FALSE))
6626 if (GET_CODE (XEXP (x, 0)) == MULT)
6629 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6632 if (GET_CODE (XEXP (x, 1)) == MULT)
6635 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6639 && GET_CODE (XEXP (x, 1)) == REG
6640 && GET_CODE (XEXP (x, 0)) == REG)
6643 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6646 x = legitimize_pic_address (x, 0);
6649 if (changed && legitimate_address_p (mode, x, FALSE))
6652 if (GET_CODE (XEXP (x, 0)) == REG)
6654 register rtx temp = gen_reg_rtx (Pmode);
6655 register rtx val = force_operand (XEXP (x, 1), temp);
6657 emit_move_insn (temp, val);
6663 else if (GET_CODE (XEXP (x, 1)) == REG)
6665 register rtx temp = gen_reg_rtx (Pmode);
6666 register rtx val = force_operand (XEXP (x, 0), temp);
6668 emit_move_insn (temp, val);
6678 /* Print an integer constant expression in assembler syntax. Addition
6679 and subtraction are the only arithmetic that may appear in these
6680 expressions. FILE is the stdio stream to write to, X is the rtx, and
6681 CODE is the operand print code from the output string. */
6684 output_pic_addr_const (file, x, code)
6691 switch (GET_CODE (x))
6701 assemble_name (file, XSTR (x, 0));
6702 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6703 fputs ("@PLT", file);
6710 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6711 assemble_name (asm_out_file, buf);
6715 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6719 /* This used to output parentheses around the expression,
6720 but that does not work on the 386 (either ATT or BSD assembler). */
6721 output_pic_addr_const (file, XEXP (x, 0), code);
6725 if (GET_MODE (x) == VOIDmode)
6727 /* We can use %d if the number is <32 bits and positive. */
6728 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6729 fprintf (file, "0x%lx%08lx",
6730 (unsigned long) CONST_DOUBLE_HIGH (x),
6731 (unsigned long) CONST_DOUBLE_LOW (x));
6733 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6736 /* We can't handle floating point constants;
6737 PRINT_OPERAND must handle them. */
6738 output_operand_lossage ("floating constant misused");
6742 /* Some assemblers need integer constants to appear first. */
6743 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6745 output_pic_addr_const (file, XEXP (x, 0), code);
6747 output_pic_addr_const (file, XEXP (x, 1), code);
6749 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6751 output_pic_addr_const (file, XEXP (x, 1), code);
6753 output_pic_addr_const (file, XEXP (x, 0), code);
6761 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6762 output_pic_addr_const (file, XEXP (x, 0), code);
6764 output_pic_addr_const (file, XEXP (x, 1), code);
6766 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6770 if (XVECLEN (x, 0) != 1)
6772 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6773 switch (XINT (x, 1))
6776 fputs ("@GOT", file);
6779 fputs ("@GOTOFF", file);
6781 case UNSPEC_GOTPCREL:
6782 fputs ("@GOTPCREL(%rip)", file);
6784 case UNSPEC_GOTTPOFF:
6785 /* FIXME: This might be @TPOFF in Sun ld too. */
6786 fputs ("@GOTTPOFF", file);
6789 fputs ("@TPOFF", file);
6793 fputs ("@TPOFF", file);
6795 fputs ("@NTPOFF", file);
6798 fputs ("@DTPOFF", file);
6800 case UNSPEC_GOTNTPOFF:
6802 fputs ("@GOTTPOFF(%rip)", file);
6804 fputs ("@GOTNTPOFF", file);
6806 case UNSPEC_INDNTPOFF:
6807 fputs ("@INDNTPOFF", file);
6810 output_operand_lossage ("invalid UNSPEC as operand");
6816 output_operand_lossage ("invalid expression as operand");
6820 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6821 We need to handle our special PIC relocations. */
6824 i386_dwarf_output_addr_const (file, x)
6829 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6833 fprintf (file, "%s", ASM_LONG);
6836 output_pic_addr_const (file, x, '\0');
6838 output_addr_const (file, x);
6842 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6843 We need to emit DTP-relative relocations. */
6846 i386_output_dwarf_dtprel (file, size, x)
6851 fputs (ASM_LONG, file);
6852 output_addr_const (file, x);
6853 fputs ("@DTPOFF", file);
6859 fputs (", 0", file);
6866 /* In the name of slightly smaller debug output, and to cater to
6867 general assembler losage, recognize PIC+GOTOFF and turn it back
6868 into a direct symbol reference. */
6871 ix86_delegitimize_address (orig_x)
6876 if (GET_CODE (x) == MEM)
6881 if (GET_CODE (x) != CONST
6882 || GET_CODE (XEXP (x, 0)) != UNSPEC
6883 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6884 || GET_CODE (orig_x) != MEM)
6886 return XVECEXP (XEXP (x, 0), 0, 0);
6889 if (GET_CODE (x) != PLUS
6890 || GET_CODE (XEXP (x, 1)) != CONST)
6893 if (GET_CODE (XEXP (x, 0)) == REG
6894 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6895 /* %ebx + GOT/GOTOFF */
6897 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6899 /* %ebx + %reg * scale + GOT/GOTOFF */
6901 if (GET_CODE (XEXP (y, 0)) == REG
6902 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6904 else if (GET_CODE (XEXP (y, 1)) == REG
6905 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6909 if (GET_CODE (y) != REG
6910 && GET_CODE (y) != MULT
6911 && GET_CODE (y) != ASHIFT)
6917 x = XEXP (XEXP (x, 1), 0);
6918 if (GET_CODE (x) == UNSPEC
6919 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6920 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6923 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6924 return XVECEXP (x, 0, 0);
6927 if (GET_CODE (x) == PLUS
6928 && GET_CODE (XEXP (x, 0)) == UNSPEC
6929 && GET_CODE (XEXP (x, 1)) == CONST_INT
6930 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6931 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6932 && GET_CODE (orig_x) != MEM)))
6934 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6936 return gen_rtx_PLUS (Pmode, y, x);
6944 put_condition_code (code, mode, reverse, fp, file)
6946 enum machine_mode mode;
6952 if (mode == CCFPmode || mode == CCFPUmode)
6954 enum rtx_code second_code, bypass_code;
6955 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6956 if (bypass_code != NIL || second_code != NIL)
6958 code = ix86_fp_compare_code_to_integer (code);
6962 code = reverse_condition (code);
6973 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6978 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6979 Those same assemblers have the same but opposite losage on cmov. */
6982 suffix = fp ? "nbe" : "a";
6985 if (mode == CCNOmode || mode == CCGOCmode)
6987 else if (mode == CCmode || mode == CCGCmode)
6998 if (mode == CCNOmode || mode == CCGOCmode)
7000 else if (mode == CCmode || mode == CCGCmode)
7009 suffix = fp ? "nb" : "ae";
7012 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7022 suffix = fp ? "u" : "p";
7025 suffix = fp ? "nu" : "np";
7030 fputs (suffix, file);
7034 print_reg (x, code, file)
7039 if (REGNO (x) == ARG_POINTER_REGNUM
7040 || REGNO (x) == FRAME_POINTER_REGNUM
7041 || REGNO (x) == FLAGS_REG
7042 || REGNO (x) == FPSR_REG)
7045 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7048 if (code == 'w' || MMX_REG_P (x))
7050 else if (code == 'b')
7052 else if (code == 'k')
7054 else if (code == 'q')
7056 else if (code == 'y')
7058 else if (code == 'h')
7061 code = GET_MODE_SIZE (GET_MODE (x));
7063 /* Irritatingly, AMD extended registers use different naming convention
7064 from the normal registers. */
7065 if (REX_INT_REG_P (x))
7072 error ("extended registers have no high halves");
7075 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7078 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7081 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7084 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7087 error ("unsupported operand size for extended register");
7095 if (STACK_TOP_P (x))
7097 fputs ("st(0)", file);
7104 if (! ANY_FP_REG_P (x))
7105 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7109 fputs (hi_reg_name[REGNO (x)], file);
7112 fputs (qi_reg_name[REGNO (x)], file);
7115 fputs (qi_high_reg_name[REGNO (x)], file);
7122 /* Locate some local-dynamic symbol still in use by this function
7123 so that we can print its name in some tls_local_dynamic_base
7127 get_some_local_dynamic_name ()
7131 if (cfun->machine->some_ld_name)
7132 return cfun->machine->some_ld_name;
7134 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7136 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7137 return cfun->machine->some_ld_name;
7143 get_some_local_dynamic_name_1 (px, data)
7145 void *data ATTRIBUTE_UNUSED;
7149 if (GET_CODE (x) == SYMBOL_REF
7150 && local_dynamic_symbolic_operand (x, Pmode))
7152 cfun->machine->some_ld_name = XSTR (x, 0);
7160 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7161 C -- print opcode suffix for set/cmov insn.
7162 c -- like C, but print reversed condition
7163 F,f -- likewise, but for floating-point.
7164 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7166 R -- print the prefix for register names.
7167 z -- print the opcode suffix for the size of the current operand.
7168 * -- print a star (in certain assembler syntax)
7169 A -- print an absolute memory reference.
7170 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7171 s -- print a shift double count, followed by the assemblers argument
7173 b -- print the QImode name of the register for the indicated operand.
7174 %b0 would print %al if operands[0] is reg 0.
7175 w -- likewise, print the HImode name of the register.
7176 k -- likewise, print the SImode name of the register.
7177 q -- likewise, print the DImode name of the register.
7178 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7179 y -- print "st(0)" instead of "st" as a register.
7180 D -- print condition for SSE cmp instruction.
7181 P -- if PIC, print an @PLT suffix.
7182 X -- don't print any sort of PIC '@' suffix for a symbol.
7183 & -- print some in-use local-dynamic symbol name.
7187 print_operand (file, x, code)
7197 if (ASSEMBLER_DIALECT == ASM_ATT)
7202 assemble_name (file, get_some_local_dynamic_name ());
7206 if (ASSEMBLER_DIALECT == ASM_ATT)
7208 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7210 /* Intel syntax. For absolute addresses, registers should not
7211 be surrounded by braces. */
7212 if (GET_CODE (x) != REG)
7215 PRINT_OPERAND (file, x, 0);
7223 PRINT_OPERAND (file, x, 0);
7228 if (ASSEMBLER_DIALECT == ASM_ATT)
7233 if (ASSEMBLER_DIALECT == ASM_ATT)
7238 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 if (ASSEMBLER_DIALECT == ASM_ATT)
7253 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 /* 387 opcodes don't get size suffixes if the operands are
7260 if (STACK_REG_P (x))
7263 /* Likewise if using Intel opcodes. */
7264 if (ASSEMBLER_DIALECT == ASM_INTEL)
7267 /* This is the size of op from size of operand. */
7268 switch (GET_MODE_SIZE (GET_MODE (x)))
7271 #ifdef HAVE_GAS_FILDS_FISTS
7277 if (GET_MODE (x) == SFmode)
7292 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7294 #ifdef GAS_MNEMONICS
7320 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7322 PRINT_OPERAND (file, x, 0);
7328 /* Little bit of braindamage here. The SSE compare instructions
7329 does use completely different names for the comparisons that the
7330 fp conditional moves. */
7331 switch (GET_CODE (x))
7346 fputs ("unord", file);
7350 fputs ("neq", file);
7354 fputs ("nlt", file);
7358 fputs ("nle", file);
7361 fputs ("ord", file);
7369 #ifdef CMOV_SUN_AS_SYNTAX
7370 if (ASSEMBLER_DIALECT == ASM_ATT)
7372 switch (GET_MODE (x))
7374 case HImode: putc ('w', file); break;
7376 case SFmode: putc ('l', file); break;
7378 case DFmode: putc ('q', file); break;
7386 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7389 #ifdef CMOV_SUN_AS_SYNTAX
7390 if (ASSEMBLER_DIALECT == ASM_ATT)
7393 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7396 /* Like above, but reverse condition */
7398 /* Check to see if argument to %c is really a constant
7399 and not a condition code which needs to be reversed. */
7400 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7402 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7405 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7408 #ifdef CMOV_SUN_AS_SYNTAX
7409 if (ASSEMBLER_DIALECT == ASM_ATT)
7412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7418 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7421 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7424 int pred_val = INTVAL (XEXP (x, 0));
7426 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7427 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7429 int taken = pred_val > REG_BR_PROB_BASE / 2;
7430 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7432 /* Emit hints only in the case default branch prediction
7433 heuristics would fail. */
7434 if (taken != cputaken)
7436 /* We use 3e (DS) prefix for taken branches and
7437 2e (CS) prefix for not taken branches. */
7439 fputs ("ds ; ", file);
7441 fputs ("cs ; ", file);
7448 output_operand_lossage ("invalid operand code `%c'", code);
7452 if (GET_CODE (x) == REG)
7454 PRINT_REG (x, code, file);
7457 else if (GET_CODE (x) == MEM)
7459 /* No `byte ptr' prefix for call instructions. */
7460 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7463 switch (GET_MODE_SIZE (GET_MODE (x)))
7465 case 1: size = "BYTE"; break;
7466 case 2: size = "WORD"; break;
7467 case 4: size = "DWORD"; break;
7468 case 8: size = "QWORD"; break;
7469 case 12: size = "XWORD"; break;
7470 case 16: size = "XMMWORD"; break;
7475 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7478 else if (code == 'w')
7480 else if (code == 'k')
7484 fputs (" PTR ", file);
7488 if (flag_pic && CONSTANT_ADDRESS_P (x))
7489 output_pic_addr_const (file, x, code);
7490 /* Avoid (%rip) for call operands. */
7491 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7492 && GET_CODE (x) != CONST_INT)
7493 output_addr_const (file, x);
7494 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7495 output_operand_lossage ("invalid constraints for operand");
7500 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7505 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7506 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7508 if (ASSEMBLER_DIALECT == ASM_ATT)
7510 fprintf (file, "0x%lx", l);
7513 /* These float cases don't actually occur as immediate operands. */
7514 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7518 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7519 fprintf (file, "%s", dstr);
7522 else if (GET_CODE (x) == CONST_DOUBLE
7523 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7527 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7528 fprintf (file, "%s", dstr);
7535 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7537 if (ASSEMBLER_DIALECT == ASM_ATT)
7540 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7541 || GET_CODE (x) == LABEL_REF)
7543 if (ASSEMBLER_DIALECT == ASM_ATT)
7546 fputs ("OFFSET FLAT:", file);
7549 if (GET_CODE (x) == CONST_INT)
7550 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7552 output_pic_addr_const (file, x, code);
7554 output_addr_const (file, x);
7558 /* Print a memory operand whose address is ADDR. */
7561 print_operand_address (file, addr)
7565 struct ix86_address parts;
7566 rtx base, index, disp;
7569 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7571 if (ASSEMBLER_DIALECT == ASM_INTEL)
7572 fputs ("DWORD PTR ", file);
7573 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7576 fputs ("fs:0", file);
7578 fputs ("gs:0", file);
7582 if (! ix86_decompose_address (addr, &parts))
7586 index = parts.index;
7588 scale = parts.scale;
7590 if (!base && !index)
7592 /* Displacement only requires special attention. */
7594 if (GET_CODE (disp) == CONST_INT)
7596 if (ASSEMBLER_DIALECT == ASM_INTEL)
7598 if (USER_LABEL_PREFIX[0] == 0)
7600 fputs ("ds:", file);
7602 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7605 output_pic_addr_const (file, addr, 0);
7607 output_addr_const (file, addr);
7609 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7611 && ((GET_CODE (addr) == SYMBOL_REF
7612 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7613 || GET_CODE (addr) == LABEL_REF
7614 || (GET_CODE (addr) == CONST
7615 && GET_CODE (XEXP (addr, 0)) == PLUS
7616 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7617 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7618 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7619 fputs ("(%rip)", file);
7623 if (ASSEMBLER_DIALECT == ASM_ATT)
7628 output_pic_addr_const (file, disp, 0);
7629 else if (GET_CODE (disp) == LABEL_REF)
7630 output_asm_label (disp);
7632 output_addr_const (file, disp);
7637 PRINT_REG (base, 0, file);
7641 PRINT_REG (index, 0, file);
7643 fprintf (file, ",%d", scale);
7649 rtx offset = NULL_RTX;
7653 /* Pull out the offset of a symbol; print any symbol itself. */
7654 if (GET_CODE (disp) == CONST
7655 && GET_CODE (XEXP (disp, 0)) == PLUS
7656 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7658 offset = XEXP (XEXP (disp, 0), 1);
7659 disp = gen_rtx_CONST (VOIDmode,
7660 XEXP (XEXP (disp, 0), 0));
7664 output_pic_addr_const (file, disp, 0);
7665 else if (GET_CODE (disp) == LABEL_REF)
7666 output_asm_label (disp);
7667 else if (GET_CODE (disp) == CONST_INT)
7670 output_addr_const (file, disp);
7676 PRINT_REG (base, 0, file);
7679 if (INTVAL (offset) >= 0)
7681 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7692 PRINT_REG (index, 0, file);
7694 fprintf (file, "*%d", scale);
7702 output_addr_const_extra (file, x)
7708 if (GET_CODE (x) != UNSPEC)
7711 op = XVECEXP (x, 0, 0);
7712 switch (XINT (x, 1))
7714 case UNSPEC_GOTTPOFF:
7715 output_addr_const (file, op);
7716 /* FIXME: This might be @TPOFF in Sun ld. */
7717 fputs ("@GOTTPOFF", file);
7720 output_addr_const (file, op);
7721 fputs ("@TPOFF", file);
7724 output_addr_const (file, op);
7726 fputs ("@TPOFF", file);
7728 fputs ("@NTPOFF", file);
7731 output_addr_const (file, op);
7732 fputs ("@DTPOFF", file);
7734 case UNSPEC_GOTNTPOFF:
7735 output_addr_const (file, op);
7737 fputs ("@GOTTPOFF(%rip)", file);
7739 fputs ("@GOTNTPOFF", file);
7741 case UNSPEC_INDNTPOFF:
7742 output_addr_const (file, op);
7743 fputs ("@INDNTPOFF", file);
7753 /* Split one or more DImode RTL references into pairs of SImode
7754 references. The RTL can be REG, offsettable MEM, integer constant, or
7755 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7756 split and "num" is its length. lo_half and hi_half are output arrays
7757 that parallel "operands". */
7760 split_di (operands, num, lo_half, hi_half)
7763 rtx lo_half[], hi_half[];
7767 rtx op = operands[num];
7769 /* simplify_subreg refuse to split volatile memory addresses,
7770 but we still have to handle it. */
7771 if (GET_CODE (op) == MEM)
7773 lo_half[num] = adjust_address (op, SImode, 0);
7774 hi_half[num] = adjust_address (op, SImode, 4);
7778 lo_half[num] = simplify_gen_subreg (SImode, op,
7779 GET_MODE (op) == VOIDmode
7780 ? DImode : GET_MODE (op), 0);
7781 hi_half[num] = simplify_gen_subreg (SImode, op,
7782 GET_MODE (op) == VOIDmode
7783 ? DImode : GET_MODE (op), 4);
7787 /* Split one or more TImode RTL references into pairs of SImode
7788 references. The RTL can be REG, offsettable MEM, integer constant, or
7789 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7790 split and "num" is its length. lo_half and hi_half are output arrays
7791 that parallel "operands". */
7794 split_ti (operands, num, lo_half, hi_half)
7797 rtx lo_half[], hi_half[];
7801 rtx op = operands[num];
7803 /* simplify_subreg refuse to split volatile memory addresses, but we
7804 still have to handle it. */
7805 if (GET_CODE (op) == MEM)
7807 lo_half[num] = adjust_address (op, DImode, 0);
7808 hi_half[num] = adjust_address (op, DImode, 8);
7812 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7813 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7818 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7819 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7820 is the expression of the binary operation. The output may either be
7821 emitted here, or returned to the caller, like all output_* functions.
7823 There is no guarantee that the operands are the same mode, as they
7824 might be within FLOAT or FLOAT_EXTEND expressions. */
7826 #ifndef SYSV386_COMPAT
7827 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7828 wants to fix the assemblers because that causes incompatibility
7829 with gcc. No-one wants to fix gcc because that causes
7830 incompatibility with assemblers... You can use the option of
7831 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7832 #define SYSV386_COMPAT 1
7836 output_387_binary_op (insn, operands)
7840 static char buf[30];
7843 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7845 #ifdef ENABLE_CHECKING
7846 /* Even if we do not want to check the inputs, this documents input
7847 constraints. Which helps in understanding the following code. */
7848 if (STACK_REG_P (operands[0])
7849 && ((REG_P (operands[1])
7850 && REGNO (operands[0]) == REGNO (operands[1])
7851 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7852 || (REG_P (operands[2])
7853 && REGNO (operands[0]) == REGNO (operands[2])
7854 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7855 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7861 switch (GET_CODE (operands[3]))
7864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7873 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7874 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7882 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7883 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7891 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7892 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7906 if (GET_MODE (operands[0]) == SFmode)
7907 strcat (buf, "ss\t{%2, %0|%0, %2}");
7909 strcat (buf, "sd\t{%2, %0|%0, %2}");
7914 switch (GET_CODE (operands[3]))
7918 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7920 rtx temp = operands[2];
7921 operands[2] = operands[1];
7925 /* know operands[0] == operands[1]. */
7927 if (GET_CODE (operands[2]) == MEM)
7933 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7935 if (STACK_TOP_P (operands[0]))
7936 /* How is it that we are storing to a dead operand[2]?
7937 Well, presumably operands[1] is dead too. We can't
7938 store the result to st(0) as st(0) gets popped on this
7939 instruction. Instead store to operands[2] (which I
7940 think has to be st(1)). st(1) will be popped later.
7941 gcc <= 2.8.1 didn't have this check and generated
7942 assembly code that the Unixware assembler rejected. */
7943 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7945 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7949 if (STACK_TOP_P (operands[0]))
7950 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7952 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7957 if (GET_CODE (operands[1]) == MEM)
7963 if (GET_CODE (operands[2]) == MEM)
7969 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7972 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7973 derived assemblers, confusingly reverse the direction of
7974 the operation for fsub{r} and fdiv{r} when the
7975 destination register is not st(0). The Intel assembler
7976 doesn't have this brain damage. Read !SYSV386_COMPAT to
7977 figure out what the hardware really does. */
7978 if (STACK_TOP_P (operands[0]))
7979 p = "{p\t%0, %2|rp\t%2, %0}";
7981 p = "{rp\t%2, %0|p\t%0, %2}";
7983 if (STACK_TOP_P (operands[0]))
7984 /* As above for fmul/fadd, we can't store to st(0). */
7985 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7987 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7992 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7995 if (STACK_TOP_P (operands[0]))
7996 p = "{rp\t%0, %1|p\t%1, %0}";
7998 p = "{p\t%1, %0|rp\t%0, %1}";
8000 if (STACK_TOP_P (operands[0]))
8001 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8003 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8008 if (STACK_TOP_P (operands[0]))
8010 if (STACK_TOP_P (operands[1]))
8011 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8013 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8016 else if (STACK_TOP_P (operands[1]))
8019 p = "{\t%1, %0|r\t%0, %1}";
8021 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8027 p = "{r\t%2, %0|\t%0, %2}";
8029 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8042 /* Output code to initialize control word copies used by
8043 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8044 is set to control word rounding downwards. */
8046 emit_i387_cw_initialization (normal, round_down)
8047 rtx normal, round_down;
8049 rtx reg = gen_reg_rtx (HImode);
8051 emit_insn (gen_x86_fnstcw_1 (normal));
8052 emit_move_insn (reg, normal);
8053 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8055 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8057 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8058 emit_move_insn (round_down, reg);
8061 /* Output code for INSN to convert a float to a signed int. OPERANDS
8062 are the insn operands. The output may be [HSD]Imode and the input
8063 operand may be [SDX]Fmode. */
8066 output_fix_trunc (insn, operands)
8070 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8071 int dimode_p = GET_MODE (operands[0]) == DImode;
8073 /* Jump through a hoop or two for DImode, since the hardware has no
8074 non-popping instruction. We used to do this a different way, but
8075 that was somewhat fragile and broke with post-reload splitters. */
8076 if (dimode_p && !stack_top_dies)
8077 output_asm_insn ("fld\t%y1", operands);
8079 if (!STACK_TOP_P (operands[1]))
8082 if (GET_CODE (operands[0]) != MEM)
8085 output_asm_insn ("fldcw\t%3", operands);
8086 if (stack_top_dies || dimode_p)
8087 output_asm_insn ("fistp%z0\t%0", operands);
8089 output_asm_insn ("fist%z0\t%0", operands);
8090 output_asm_insn ("fldcw\t%2", operands);
8095 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8096 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8097 when fucom should be used. */
8100 output_fp_compare (insn, operands, eflags_p, unordered_p)
8103 int eflags_p, unordered_p;
8106 rtx cmp_op0 = operands[0];
8107 rtx cmp_op1 = operands[1];
8108 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8113 cmp_op1 = operands[2];
8117 if (GET_MODE (operands[0]) == SFmode)
8119 return "ucomiss\t{%1, %0|%0, %1}";
8121 return "comiss\t{%1, %0|%0, %1}";
8124 return "ucomisd\t{%1, %0|%0, %1}";
8126 return "comisd\t{%1, %0|%0, %1}";
8129 if (! STACK_TOP_P (cmp_op0))
8132 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8134 if (STACK_REG_P (cmp_op1)
8136 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8137 && REGNO (cmp_op1) != FIRST_STACK_REG)
8139 /* If both the top of the 387 stack dies, and the other operand
8140 is also a stack register that dies, then this must be a
8141 `fcompp' float compare */
8145 /* There is no double popping fcomi variant. Fortunately,
8146 eflags is immune from the fstp's cc clobbering. */
8148 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8150 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8158 return "fucompp\n\tfnstsw\t%0";
8160 return "fcompp\n\tfnstsw\t%0";
8173 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8175 static const char * const alt[24] =
8187 "fcomi\t{%y1, %0|%0, %y1}",
8188 "fcomip\t{%y1, %0|%0, %y1}",
8189 "fucomi\t{%y1, %0|%0, %y1}",
8190 "fucomip\t{%y1, %0|%0, %y1}",
8197 "fcom%z2\t%y2\n\tfnstsw\t%0",
8198 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8199 "fucom%z2\t%y2\n\tfnstsw\t%0",
8200 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8202 "ficom%z2\t%y2\n\tfnstsw\t%0",
8203 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8211 mask = eflags_p << 3;
8212 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8213 mask |= unordered_p << 1;
8214 mask |= stack_top_dies;
8227 ix86_output_addr_vec_elt (file, value)
8231 const char *directive = ASM_LONG;
8236 directive = ASM_QUAD;
8242 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8246 ix86_output_addr_diff_elt (file, value, rel)
8251 fprintf (file, "%s%s%d-%s%d\n",
8252 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8253 else if (HAVE_AS_GOTOFF_IN_DATA)
8254 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8256 else if (TARGET_MACHO)
8257 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8258 machopic_function_base_name () + 1);
8261 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8262 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8265 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8269 ix86_expand_clear (dest)
8274 /* We play register width games, which are only valid after reload. */
8275 if (!reload_completed)
8278 /* Avoid HImode and its attendant prefix byte. */
8279 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8280 dest = gen_rtx_REG (SImode, REGNO (dest));
8282 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8284 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8285 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8287 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8288 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8294 /* X is an unchanging MEM. If it is a constant pool reference, return
8295 the constant pool rtx, else NULL. */
8298 maybe_get_pool_constant (x)
8301 x = ix86_delegitimize_address (XEXP (x, 0));
8303 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8304 return get_pool_constant (x);
8310 ix86_expand_move (mode, operands)
8311 enum machine_mode mode;
8314 int strict = (reload_in_progress || reload_completed);
8315 rtx insn, op0, op1, tmp;
8320 if (tls_symbolic_operand (op1, Pmode))
8322 op1 = legitimize_address (op1, op1, VOIDmode);
8323 if (GET_CODE (op0) == MEM)
8325 tmp = gen_reg_rtx (mode);
8326 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8330 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8335 rtx temp = ((reload_in_progress
8336 || ((op0 && GET_CODE (op0) == REG)
8338 ? op0 : gen_reg_rtx (Pmode));
8339 op1 = machopic_indirect_data_reference (op1, temp);
8340 op1 = machopic_legitimize_pic_address (op1, mode,
8341 temp == op1 ? 0 : temp);
8345 if (MACHOPIC_INDIRECT)
8346 op1 = machopic_indirect_data_reference (op1, 0);
8350 insn = gen_rtx_SET (VOIDmode, op0, op1);
8354 #endif /* TARGET_MACHO */
8355 if (GET_CODE (op0) == MEM)
8356 op1 = force_reg (Pmode, op1);
8360 if (GET_CODE (temp) != REG)
8361 temp = gen_reg_rtx (Pmode);
8362 temp = legitimize_pic_address (op1, temp);
8370 if (GET_CODE (op0) == MEM
8371 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8372 || !push_operand (op0, mode))
8373 && GET_CODE (op1) == MEM)
8374 op1 = force_reg (mode, op1);
8376 if (push_operand (op0, mode)
8377 && ! general_no_elim_operand (op1, mode))
8378 op1 = copy_to_mode_reg (mode, op1);
8380 /* Force large constants in 64bit compilation into register
8381 to get them CSEed. */
8382 if (TARGET_64BIT && mode == DImode
8383 && immediate_operand (op1, mode)
8384 && !x86_64_zero_extended_value (op1)
8385 && !register_operand (op0, mode)
8386 && optimize && !reload_completed && !reload_in_progress)
8387 op1 = copy_to_mode_reg (mode, op1);
8389 if (FLOAT_MODE_P (mode))
8391 /* If we are loading a floating point constant to a register,
8392 force the value to memory now, since we'll get better code
8393 out the back end. */
8397 else if (GET_CODE (op1) == CONST_DOUBLE)
8399 op1 = validize_mem (force_const_mem (mode, op1));
8400 if (!register_operand (op0, mode))
8402 rtx temp = gen_reg_rtx (mode);
8403 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8404 emit_move_insn (op0, temp);
8411 insn = gen_rtx_SET (VOIDmode, op0, op1);
8417 ix86_expand_vector_move (mode, operands)
8418 enum machine_mode mode;
8421 /* Force constants other than zero into memory. We do not know how
8422 the instructions used to build constants modify the upper 64 bits
8423 of the register, once we have that information we may be able
8424 to handle some of them more efficiently. */
8425 if ((reload_in_progress | reload_completed) == 0
8426 && register_operand (operands[0], mode)
8427 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8428 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8430 /* Make operand1 a register if it isn't already. */
8432 && !register_operand (operands[0], mode)
8433 && !register_operand (operands[1], mode))
8435 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8436 emit_move_insn (operands[0], temp);
8440 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8443 /* Attempt to expand a binary operator. Make the expansion closer to the
8444 actual machine, then just general_operand, which will allow 3 separate
8445 memory references (one output, two input) in a single insn. */
8448 ix86_expand_binary_operator (code, mode, operands)
8450 enum machine_mode mode;
8453 int matching_memory;
8454 rtx src1, src2, dst, op, clob;
8460 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8461 if (GET_RTX_CLASS (code) == 'c'
8462 && (rtx_equal_p (dst, src2)
8463 || immediate_operand (src1, mode)))
8470 /* If the destination is memory, and we do not have matching source
8471 operands, do things in registers. */
8472 matching_memory = 0;
8473 if (GET_CODE (dst) == MEM)
8475 if (rtx_equal_p (dst, src1))
8476 matching_memory = 1;
8477 else if (GET_RTX_CLASS (code) == 'c'
8478 && rtx_equal_p (dst, src2))
8479 matching_memory = 2;
8481 dst = gen_reg_rtx (mode);
8484 /* Both source operands cannot be in memory. */
8485 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8487 if (matching_memory != 2)
8488 src2 = force_reg (mode, src2);
8490 src1 = force_reg (mode, src1);
8493 /* If the operation is not commutable, source 1 cannot be a constant
8494 or non-matching memory. */
8495 if ((CONSTANT_P (src1)
8496 || (!matching_memory && GET_CODE (src1) == MEM))
8497 && GET_RTX_CLASS (code) != 'c')
8498 src1 = force_reg (mode, src1);
8500 /* If optimizing, copy to regs to improve CSE */
8501 if (optimize && ! no_new_pseudos)
8503 if (GET_CODE (dst) == MEM)
8504 dst = gen_reg_rtx (mode);
8505 if (GET_CODE (src1) == MEM)
8506 src1 = force_reg (mode, src1);
8507 if (GET_CODE (src2) == MEM)
8508 src2 = force_reg (mode, src2);
8511 /* Emit the instruction. */
8513 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8514 if (reload_in_progress)
8516 /* Reload doesn't know about the flags register, and doesn't know that
8517 it doesn't want to clobber it. We can only do this with PLUS. */
8524 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8525 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8528 /* Fix up the destination if needed. */
8529 if (dst != operands[0])
8530 emit_move_insn (operands[0], dst);
8533 /* Return TRUE or FALSE depending on whether the binary operator meets the
8534 appropriate constraints. */
8537 ix86_binary_operator_ok (code, mode, operands)
8539 enum machine_mode mode ATTRIBUTE_UNUSED;
8542 /* Both source operands cannot be in memory. */
8543 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8545 /* If the operation is not commutable, source 1 cannot be a constant. */
8546 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8548 /* If the destination is memory, we must have a matching source operand. */
8549 if (GET_CODE (operands[0]) == MEM
8550 && ! (rtx_equal_p (operands[0], operands[1])
8551 || (GET_RTX_CLASS (code) == 'c'
8552 && rtx_equal_p (operands[0], operands[2]))))
8554 /* If the operation is not commutable and the source 1 is memory, we must
8555 have a matching destination. */
8556 if (GET_CODE (operands[1]) == MEM
8557 && GET_RTX_CLASS (code) != 'c'
8558 && ! rtx_equal_p (operands[0], operands[1]))
8563 /* Attempt to expand a unary operator. Make the expansion closer to the
8564 actual machine, then just general_operand, which will allow 2 separate
8565 memory references (one output, one input) in a single insn. */
8568 ix86_expand_unary_operator (code, mode, operands)
8570 enum machine_mode mode;
8573 int matching_memory;
8574 rtx src, dst, op, clob;
8579 /* If the destination is memory, and we do not have matching source
8580 operands, do things in registers. */
8581 matching_memory = 0;
8582 if (GET_CODE (dst) == MEM)
8584 if (rtx_equal_p (dst, src))
8585 matching_memory = 1;
8587 dst = gen_reg_rtx (mode);
8590 /* When source operand is memory, destination must match. */
8591 if (!matching_memory && GET_CODE (src) == MEM)
8592 src = force_reg (mode, src);
8594 /* If optimizing, copy to regs to improve CSE */
8595 if (optimize && ! no_new_pseudos)
8597 if (GET_CODE (dst) == MEM)
8598 dst = gen_reg_rtx (mode);
8599 if (GET_CODE (src) == MEM)
8600 src = force_reg (mode, src);
8603 /* Emit the instruction. */
8605 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8606 if (reload_in_progress || code == NOT)
8608 /* Reload doesn't know about the flags register, and doesn't know that
8609 it doesn't want to clobber it. */
8616 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8617 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8620 /* Fix up the destination if needed. */
8621 if (dst != operands[0])
8622 emit_move_insn (operands[0], dst);
8625 /* Return TRUE or FALSE depending on whether the unary operator meets the
8626 appropriate constraints. */
8629 ix86_unary_operator_ok (code, mode, operands)
8630 enum rtx_code code ATTRIBUTE_UNUSED;
8631 enum machine_mode mode ATTRIBUTE_UNUSED;
8632 rtx operands[2] ATTRIBUTE_UNUSED;
8634 /* If one of operands is memory, source and destination must match. */
8635 if ((GET_CODE (operands[0]) == MEM
8636 || GET_CODE (operands[1]) == MEM)
8637 && ! rtx_equal_p (operands[0], operands[1]))
8642 /* Return TRUE or FALSE depending on whether the first SET in INSN
8643 has source and destination with matching CC modes, and that the
8644 CC mode is at least as constrained as REQ_MODE. */
8647 ix86_match_ccmode (insn, req_mode)
8649 enum machine_mode req_mode;
8652 enum machine_mode set_mode;
8654 set = PATTERN (insn);
8655 if (GET_CODE (set) == PARALLEL)
8656 set = XVECEXP (set, 0, 0);
8657 if (GET_CODE (set) != SET)
8659 if (GET_CODE (SET_SRC (set)) != COMPARE)
8662 set_mode = GET_MODE (SET_DEST (set));
8666 if (req_mode != CCNOmode
8667 && (req_mode != CCmode
8668 || XEXP (SET_SRC (set), 1) != const0_rtx))
8672 if (req_mode == CCGCmode)
8676 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8680 if (req_mode == CCZmode)
8690 return (GET_MODE (SET_SRC (set)) == set_mode);
8693 /* Generate insn patterns to do an integer compare of OPERANDS. */
8696 ix86_expand_int_compare (code, op0, op1)
8700 enum machine_mode cmpmode;
8703 cmpmode = SELECT_CC_MODE (code, op0, op1);
8704 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8706 /* This is very simple, but making the interface the same as in the
8707 FP case makes the rest of the code easier. */
8708 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8709 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8711 /* Return the test that should be put into the flags user, i.e.
8712 the bcc, scc, or cmov instruction. */
8713 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8716 /* Figure out whether to use ordered or unordered fp comparisons.
8717 Return the appropriate mode to use. */
8720 ix86_fp_compare_mode (code)
8721 enum rtx_code code ATTRIBUTE_UNUSED;
8723 /* ??? In order to make all comparisons reversible, we do all comparisons
8724 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8725 all forms trapping and nontrapping comparisons, we can make inequality
8726 comparisons trapping again, since it results in better code when using
8727 FCOM based compares. */
8728 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8732 ix86_cc_mode (code, op0, op1)
8736 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8737 return ix86_fp_compare_mode (code);
8740 /* Only zero flag is needed. */
8742 case NE: /* ZF!=0 */
8744 /* Codes needing carry flag. */
8745 case GEU: /* CF=0 */
8746 case GTU: /* CF=0 & ZF=0 */
8747 case LTU: /* CF=1 */
8748 case LEU: /* CF=1 | ZF=1 */
8750 /* Codes possibly doable only with sign flag when
8751 comparing against zero. */
8752 case GE: /* SF=OF or SF=0 */
8753 case LT: /* SF<>OF or SF=1 */
8754 if (op1 == const0_rtx)
8757 /* For other cases Carry flag is not required. */
8759 /* Codes doable only with sign flag when comparing
8760 against zero, but we miss jump instruction for it
8761 so we need to use relational tests against overflow
8762 that thus needs to be zero. */
8763 case GT: /* ZF=0 & SF=OF */
8764 case LE: /* ZF=1 | SF<>OF */
8765 if (op1 == const0_rtx)
8769 /* strcmp pattern do (use flags) and combine may ask us for proper
8778 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8781 ix86_use_fcomi_compare (code)
8782 enum rtx_code code ATTRIBUTE_UNUSED;
8784 enum rtx_code swapped_code = swap_condition (code);
8785 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8786 || (ix86_fp_comparison_cost (swapped_code)
8787 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8790 /* Swap, force into registers, or otherwise massage the two operands
8791 to a fp comparison. The operands are updated in place; the new
8792 comparison code is returned. */
8794 static enum rtx_code
8795 ix86_prepare_fp_compare_args (code, pop0, pop1)
8799 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8800 rtx op0 = *pop0, op1 = *pop1;
8801 enum machine_mode op_mode = GET_MODE (op0);
8802 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8804 /* All of the unordered compare instructions only work on registers.
8805 The same is true of the XFmode compare instructions. The same is
8806 true of the fcomi compare instructions. */
8809 && (fpcmp_mode == CCFPUmode
8810 || op_mode == XFmode
8811 || op_mode == TFmode
8812 || ix86_use_fcomi_compare (code)))
8814 op0 = force_reg (op_mode, op0);
8815 op1 = force_reg (op_mode, op1);
8819 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8820 things around if they appear profitable, otherwise force op0
8823 if (standard_80387_constant_p (op0) == 0
8824 || (GET_CODE (op0) == MEM
8825 && ! (standard_80387_constant_p (op1) == 0
8826 || GET_CODE (op1) == MEM)))
8829 tmp = op0, op0 = op1, op1 = tmp;
8830 code = swap_condition (code);
8833 if (GET_CODE (op0) != REG)
8834 op0 = force_reg (op_mode, op0);
8836 if (CONSTANT_P (op1))
8838 if (standard_80387_constant_p (op1))
8839 op1 = force_reg (op_mode, op1);
8841 op1 = validize_mem (force_const_mem (op_mode, op1));
8845 /* Try to rearrange the comparison to make it cheaper. */
8846 if (ix86_fp_comparison_cost (code)
8847 > ix86_fp_comparison_cost (swap_condition (code))
8848 && (GET_CODE (op1) == REG || !no_new_pseudos))
8851 tmp = op0, op0 = op1, op1 = tmp;
8852 code = swap_condition (code);
8853 if (GET_CODE (op0) != REG)
8854 op0 = force_reg (op_mode, op0);
8862 /* Convert comparison codes we use to represent FP comparison to integer
8863 code that will result in proper branch. Return UNKNOWN if no such code
8865 static enum rtx_code
8866 ix86_fp_compare_code_to_integer (code)
8896 /* Split comparison code CODE into comparisons we can do using branch
8897 instructions. BYPASS_CODE is comparison code for branch that will
8898 branch around FIRST_CODE and SECOND_CODE. If some of branches
8899 is not required, set value to NIL.
8900 We never require more than two branches. */
8902 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8903 enum rtx_code code, *bypass_code, *first_code, *second_code;
8909 /* The fcomi comparison sets flags as follows:
8919 case GT: /* GTU - CF=0 & ZF=0 */
8920 case GE: /* GEU - CF=0 */
8921 case ORDERED: /* PF=0 */
8922 case UNORDERED: /* PF=1 */
8923 case UNEQ: /* EQ - ZF=1 */
8924 case UNLT: /* LTU - CF=1 */
8925 case UNLE: /* LEU - CF=1 | ZF=1 */
8926 case LTGT: /* EQ - ZF=0 */
8928 case LT: /* LTU - CF=1 - fails on unordered */
8930 *bypass_code = UNORDERED;
8932 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8934 *bypass_code = UNORDERED;
8936 case EQ: /* EQ - ZF=1 - fails on unordered */
8938 *bypass_code = UNORDERED;
8940 case NE: /* NE - ZF=0 - fails on unordered */
8942 *second_code = UNORDERED;
8944 case UNGE: /* GEU - CF=0 - fails on unordered */
8946 *second_code = UNORDERED;
8948 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8950 *second_code = UNORDERED;
8955 if (!TARGET_IEEE_FP)
8962 /* Return cost of comparison done fcom + arithmetics operations on AX.
8963 All following functions do use number of instructions as a cost metrics.
8964 In future this should be tweaked to compute bytes for optimize_size and
8965 take into account performance of various instructions on various CPUs. */
8967 ix86_fp_comparison_arithmetics_cost (code)
8970 if (!TARGET_IEEE_FP)
8972 /* The cost of code output by ix86_expand_fp_compare. */
9000 /* Return cost of comparison done using fcomi operation.
9001 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9003 ix86_fp_comparison_fcomi_cost (code)
9006 enum rtx_code bypass_code, first_code, second_code;
9007 /* Return arbitrarily high cost when instruction is not supported - this
9008 prevents gcc from using it. */
9011 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9012 return (bypass_code != NIL || second_code != NIL) + 2;
9015 /* Return cost of comparison done using sahf operation.
9016 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9018 ix86_fp_comparison_sahf_cost (code)
9021 enum rtx_code bypass_code, first_code, second_code;
9022 /* Return arbitrarily high cost when instruction is not preferred - this
9023 avoids gcc from using it. */
9024 if (!TARGET_USE_SAHF && !optimize_size)
9026 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9027 return (bypass_code != NIL || second_code != NIL) + 3;
9030 /* Compute cost of the comparison done using any method.
9031 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9033 ix86_fp_comparison_cost (code)
9036 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9039 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9040 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9042 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9043 if (min > sahf_cost)
9045 if (min > fcomi_cost)
9050 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9053 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9055 rtx op0, op1, scratch;
9059 enum machine_mode fpcmp_mode, intcmp_mode;
9061 int cost = ix86_fp_comparison_cost (code);
9062 enum rtx_code bypass_code, first_code, second_code;
9064 fpcmp_mode = ix86_fp_compare_mode (code);
9065 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9068 *second_test = NULL_RTX;
9070 *bypass_test = NULL_RTX;
9072 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9074 /* Do fcomi/sahf based test when profitable. */
9075 if ((bypass_code == NIL || bypass_test)
9076 && (second_code == NIL || second_test)
9077 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9081 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9082 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9088 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9089 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9091 scratch = gen_reg_rtx (HImode);
9092 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9093 emit_insn (gen_x86_sahf_1 (scratch));
9096 /* The FP codes work out to act like unsigned. */
9097 intcmp_mode = fpcmp_mode;
9099 if (bypass_code != NIL)
9100 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9101 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9103 if (second_code != NIL)
9104 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9105 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9110 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9111 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9112 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9114 scratch = gen_reg_rtx (HImode);
9115 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9117 /* In the unordered case, we have to check C2 for NaN's, which
9118 doesn't happen to work out to anything nice combination-wise.
9119 So do some bit twiddling on the value we've got in AH to come
9120 up with an appropriate set of condition codes. */
9122 intcmp_mode = CCNOmode;
9127 if (code == GT || !TARGET_IEEE_FP)
9129 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9134 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9135 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9136 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9137 intcmp_mode = CCmode;
9143 if (code == LT && TARGET_IEEE_FP)
9145 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9146 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9147 intcmp_mode = CCmode;
9152 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9158 if (code == GE || !TARGET_IEEE_FP)
9160 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9165 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9166 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9173 if (code == LE && TARGET_IEEE_FP)
9175 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9176 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9177 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9178 intcmp_mode = CCmode;
9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9189 if (code == EQ && TARGET_IEEE_FP)
9191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9192 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9193 intcmp_mode = CCmode;
9198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9205 if (code == NE && TARGET_IEEE_FP)
9207 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9208 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9220 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9224 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9233 /* Return the test that should be put into the flags user, i.e.
9234 the bcc, scc, or cmov instruction. */
9235 return gen_rtx_fmt_ee (code, VOIDmode,
9236 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9241 ix86_expand_compare (code, second_test, bypass_test)
9243 rtx *second_test, *bypass_test;
9246 op0 = ix86_compare_op0;
9247 op1 = ix86_compare_op1;
9250 *second_test = NULL_RTX;
9252 *bypass_test = NULL_RTX;
9254 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9255 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9256 second_test, bypass_test);
9258 ret = ix86_expand_int_compare (code, op0, op1);
9263 /* Return true if the CODE will result in nontrivial jump sequence. */
9265 ix86_fp_jump_nontrivial_p (code)
9268 enum rtx_code bypass_code, first_code, second_code;
9271 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9272 return bypass_code != NIL || second_code != NIL;
9276 ix86_expand_branch (code, label)
9282 switch (GET_MODE (ix86_compare_op0))
9288 tmp = ix86_expand_compare (code, NULL, NULL);
9289 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9290 gen_rtx_LABEL_REF (VOIDmode, label),
9292 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9302 enum rtx_code bypass_code, first_code, second_code;
9304 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9307 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9309 /* Check whether we will use the natural sequence with one jump. If
9310 so, we can expand jump early. Otherwise delay expansion by
9311 creating compound insn to not confuse optimizers. */
9312 if (bypass_code == NIL && second_code == NIL
9315 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9316 gen_rtx_LABEL_REF (VOIDmode, label),
9321 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9322 ix86_compare_op0, ix86_compare_op1);
9323 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9324 gen_rtx_LABEL_REF (VOIDmode, label),
9326 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9328 use_fcomi = ix86_use_fcomi_compare (code);
9329 vec = rtvec_alloc (3 + !use_fcomi);
9330 RTVEC_ELT (vec, 0) = tmp;
9332 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9334 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9337 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9339 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9347 /* Expand DImode branch into multiple compare+branch. */
9349 rtx lo[2], hi[2], label2;
9350 enum rtx_code code1, code2, code3;
9352 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9354 tmp = ix86_compare_op0;
9355 ix86_compare_op0 = ix86_compare_op1;
9356 ix86_compare_op1 = tmp;
9357 code = swap_condition (code);
9359 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9360 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9362 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9363 avoid two branches. This costs one extra insn, so disable when
9364 optimizing for size. */
9366 if ((code == EQ || code == NE)
9368 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9373 if (hi[1] != const0_rtx)
9374 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9375 NULL_RTX, 0, OPTAB_WIDEN);
9378 if (lo[1] != const0_rtx)
9379 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9380 NULL_RTX, 0, OPTAB_WIDEN);
9382 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9383 NULL_RTX, 0, OPTAB_WIDEN);
9385 ix86_compare_op0 = tmp;
9386 ix86_compare_op1 = const0_rtx;
9387 ix86_expand_branch (code, label);
9391 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9392 op1 is a constant and the low word is zero, then we can just
9393 examine the high word. */
9395 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9398 case LT: case LTU: case GE: case GEU:
9399 ix86_compare_op0 = hi[0];
9400 ix86_compare_op1 = hi[1];
9401 ix86_expand_branch (code, label);
9407 /* Otherwise, we need two or three jumps. */
9409 label2 = gen_label_rtx ();
9412 code2 = swap_condition (code);
9413 code3 = unsigned_condition (code);
9417 case LT: case GT: case LTU: case GTU:
9420 case LE: code1 = LT; code2 = GT; break;
9421 case GE: code1 = GT; code2 = LT; break;
9422 case LEU: code1 = LTU; code2 = GTU; break;
9423 case GEU: code1 = GTU; code2 = LTU; break;
9425 case EQ: code1 = NIL; code2 = NE; break;
9426 case NE: code2 = NIL; break;
9434 * if (hi(a) < hi(b)) goto true;
9435 * if (hi(a) > hi(b)) goto false;
9436 * if (lo(a) < lo(b)) goto true;
9440 ix86_compare_op0 = hi[0];
9441 ix86_compare_op1 = hi[1];
9444 ix86_expand_branch (code1, label);
9446 ix86_expand_branch (code2, label2);
9448 ix86_compare_op0 = lo[0];
9449 ix86_compare_op1 = lo[1];
9450 ix86_expand_branch (code3, label);
9453 emit_label (label2);
9462 /* Split branch based on floating point condition. */
9464 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9466 rtx op1, op2, target1, target2, tmp;
9469 rtx label = NULL_RTX;
9471 int bypass_probability = -1, second_probability = -1, probability = -1;
9474 if (target2 != pc_rtx)
9477 code = reverse_condition_maybe_unordered (code);
9482 condition = ix86_expand_fp_compare (code, op1, op2,
9483 tmp, &second, &bypass);
9485 if (split_branch_probability >= 0)
9487 /* Distribute the probabilities across the jumps.
9488 Assume the BYPASS and SECOND to be always test
9490 probability = split_branch_probability;
9492 /* Value of 1 is low enough to make no need for probability
9493 to be updated. Later we may run some experiments and see
9494 if unordered values are more frequent in practice. */
9496 bypass_probability = 1;
9498 second_probability = 1;
9500 if (bypass != NULL_RTX)
9502 label = gen_label_rtx ();
9503 i = emit_jump_insn (gen_rtx_SET
9505 gen_rtx_IF_THEN_ELSE (VOIDmode,
9507 gen_rtx_LABEL_REF (VOIDmode,
9510 if (bypass_probability >= 0)
9512 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9513 GEN_INT (bypass_probability),
9516 i = emit_jump_insn (gen_rtx_SET
9518 gen_rtx_IF_THEN_ELSE (VOIDmode,
9519 condition, target1, target2)));
9520 if (probability >= 0)
9522 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9523 GEN_INT (probability),
9525 if (second != NULL_RTX)
9527 i = emit_jump_insn (gen_rtx_SET
9529 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9531 if (second_probability >= 0)
9533 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9534 GEN_INT (second_probability),
9537 if (label != NULL_RTX)
9542 ix86_expand_setcc (code, dest)
9546 rtx ret, tmp, tmpreg;
9547 rtx second_test, bypass_test;
9549 if (GET_MODE (ix86_compare_op0) == DImode
9551 return 0; /* FAIL */
9553 if (GET_MODE (dest) != QImode)
9556 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9557 PUT_MODE (ret, QImode);
9562 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9563 if (bypass_test || second_test)
9565 rtx test = second_test;
9567 rtx tmp2 = gen_reg_rtx (QImode);
9574 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9576 PUT_MODE (test, QImode);
9577 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9580 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9582 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9585 return 1; /* DONE */
9588 /* Expand comparison setting or clearing carry flag. Return true when successful
9589 and set pop for the operation. */
9591 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9595 enum machine_mode mode =
9596 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9598 /* Do not handle DImode compares that go trought special path. Also we can't
9599 deal with FP compares yet. This is possible to add. */
9600 if ((mode == DImode && !TARGET_64BIT))
9602 if (FLOAT_MODE_P (mode))
9604 rtx second_test = NULL, bypass_test = NULL;
9605 rtx compare_op, compare_seq;
9607 /* Shortcut: following common codes never translate into carry flag compares. */
9608 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9609 || code == ORDERED || code == UNORDERED)
9612 /* These comparisons require zero flag; swap operands so they won't. */
9613 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9619 code = swap_condition (code);
9622 /* Try to expand the comparsion and verify that we end up with carry flag
9623 based comparsion. This is fails to be true only when we decide to expand
9624 comparsion using arithmetic that is not too common scenario. */
9626 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9627 &second_test, &bypass_test);
9628 compare_seq = get_insns ();
9631 if (second_test || bypass_test)
9633 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9634 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9635 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9637 code = GET_CODE (compare_op);
9638 if (code != LTU && code != GEU)
9640 emit_insn (compare_seq);
9644 if (!INTEGRAL_MODE_P (mode))
9652 /* Convert a==0 into (unsigned)a<1. */
9655 if (op1 != const0_rtx)
9658 code = (code == EQ ? LTU : GEU);
9661 /* Convert a>b into b<a or a>=b-1. */
9664 if (GET_CODE (op1) == CONST_INT)
9666 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9667 /* Bail out on overflow. We still can swap operands but that
9668 would force loading of the constant into register. */
9669 if (op1 == const0_rtx
9670 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9672 code = (code == GTU ? GEU : LTU);
9679 code = (code == GTU ? LTU : GEU);
9683 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9686 if (mode == DImode || op1 != const0_rtx)
9688 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9689 code = (code == LT ? GEU : LTU);
9693 if (mode == DImode || op1 != constm1_rtx)
9695 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9696 code = (code == LE ? GEU : LTU);
9702 ix86_compare_op0 = op0;
9703 ix86_compare_op1 = op1;
9704 *pop = ix86_expand_compare (code, NULL, NULL);
9705 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9711 ix86_expand_int_movcc (operands)
9714 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9715 rtx compare_seq, compare_op;
9716 rtx second_test, bypass_test;
9717 enum machine_mode mode = GET_MODE (operands[0]);
9718 bool sign_bit_compare_p = false;;
9721 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9722 compare_seq = get_insns ();
9725 compare_code = GET_CODE (compare_op);
9727 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9728 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9729 sign_bit_compare_p = true;
9731 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9732 HImode insns, we'd be swallowed in word prefix ops. */
9734 if ((mode != HImode || TARGET_FAST_PREFIX)
9735 && (mode != DImode || TARGET_64BIT)
9736 && GET_CODE (operands[2]) == CONST_INT
9737 && GET_CODE (operands[3]) == CONST_INT)
9739 rtx out = operands[0];
9740 HOST_WIDE_INT ct = INTVAL (operands[2]);
9741 HOST_WIDE_INT cf = INTVAL (operands[3]);
9745 /* Sign bit compares are better done using shifts than we do by using
9747 if (sign_bit_compare_p
9748 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9749 ix86_compare_op1, &compare_op))
9751 /* Detect overlap between destination and compare sources. */
9754 if (!sign_bit_compare_p)
9758 compare_code = GET_CODE (compare_op);
9760 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9761 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9764 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9767 /* To simplify rest of code, restrict to the GEU case. */
9768 if (compare_code == LTU)
9770 HOST_WIDE_INT tmp = ct;
9773 compare_code = reverse_condition (compare_code);
9774 code = reverse_condition (code);
9779 PUT_CODE (compare_op,
9780 reverse_condition_maybe_unordered
9781 (GET_CODE (compare_op)));
9783 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9787 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9788 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9789 tmp = gen_reg_rtx (mode);
9792 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9794 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9798 if (code == GT || code == GE)
9799 code = reverse_condition (code);
9802 HOST_WIDE_INT tmp = ct;
9807 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9808 ix86_compare_op1, VOIDmode, 0, -1);
9821 tmp = expand_simple_binop (mode, PLUS,
9823 copy_rtx (tmp), 1, OPTAB_DIRECT);
9834 tmp = expand_simple_binop (mode, IOR,
9836 copy_rtx (tmp), 1, OPTAB_DIRECT);
9838 else if (diff == -1 && ct)
9848 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9850 tmp = expand_simple_binop (mode, PLUS,
9851 copy_rtx (tmp), GEN_INT (cf),
9852 copy_rtx (tmp), 1, OPTAB_DIRECT);
9860 * andl cf - ct, dest
9870 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9873 tmp = expand_simple_binop (mode, AND,
9875 gen_int_mode (cf - ct, mode),
9876 copy_rtx (tmp), 1, OPTAB_DIRECT);
9878 tmp = expand_simple_binop (mode, PLUS,
9879 copy_rtx (tmp), GEN_INT (ct),
9880 copy_rtx (tmp), 1, OPTAB_DIRECT);
9883 if (!rtx_equal_p (tmp, out))
9884 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9886 return 1; /* DONE */
9892 tmp = ct, ct = cf, cf = tmp;
9894 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9896 /* We may be reversing unordered compare to normal compare, that
9897 is not valid in general (we may convert non-trapping condition
9898 to trapping one), however on i386 we currently emit all
9899 comparisons unordered. */
9900 compare_code = reverse_condition_maybe_unordered (compare_code);
9901 code = reverse_condition_maybe_unordered (code);
9905 compare_code = reverse_condition (compare_code);
9906 code = reverse_condition (code);
9911 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9912 && GET_CODE (ix86_compare_op1) == CONST_INT)
9914 if (ix86_compare_op1 == const0_rtx
9915 && (code == LT || code == GE))
9916 compare_code = code;
9917 else if (ix86_compare_op1 == constm1_rtx)
9921 else if (code == GT)
9926 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9927 if (compare_code != NIL
9928 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9929 && (cf == -1 || ct == -1))
9931 /* If lea code below could be used, only optimize
9932 if it results in a 2 insn sequence. */
9934 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9935 || diff == 3 || diff == 5 || diff == 9)
9936 || (compare_code == LT && ct == -1)
9937 || (compare_code == GE && cf == -1))
9940 * notl op1 (if necessary)
9948 code = reverse_condition (code);
9951 out = emit_store_flag (out, code, ix86_compare_op0,
9952 ix86_compare_op1, VOIDmode, 0, -1);
9954 out = expand_simple_binop (mode, IOR,
9956 out, 1, OPTAB_DIRECT);
9957 if (out != operands[0])
9958 emit_move_insn (operands[0], out);
9960 return 1; /* DONE */
9965 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9966 || diff == 3 || diff == 5 || diff == 9)
9967 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9968 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9974 * lea cf(dest*(ct-cf)),dest
9978 * This also catches the degenerate setcc-only case.
9984 out = emit_store_flag (out, code, ix86_compare_op0,
9985 ix86_compare_op1, VOIDmode, 0, 1);
9988 /* On x86_64 the lea instruction operates on Pmode, so we need
9989 to get arithmetics done in proper mode to match. */
9991 tmp = copy_rtx (out);
9995 out1 = copy_rtx (out);
9996 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10000 tmp = gen_rtx_PLUS (mode, tmp, out1);
10006 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10009 if (!rtx_equal_p (tmp, out))
10012 out = force_operand (tmp, copy_rtx (out));
10014 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10016 if (!rtx_equal_p (out, operands[0]))
10017 emit_move_insn (operands[0], copy_rtx (out));
10019 return 1; /* DONE */
10023 * General case: Jumpful:
10024 * xorl dest,dest cmpl op1, op2
10025 * cmpl op1, op2 movl ct, dest
10026 * setcc dest jcc 1f
10027 * decl dest movl cf, dest
10028 * andl (cf-ct),dest 1:
10031 * Size 20. Size 14.
10033 * This is reasonably steep, but branch mispredict costs are
10034 * high on modern cpus, so consider failing only if optimizing
10038 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10039 && BRANCH_COST >= 2)
10045 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10046 /* We may be reversing unordered compare to normal compare,
10047 that is not valid in general (we may convert non-trapping
10048 condition to trapping one), however on i386 we currently
10049 emit all comparisons unordered. */
10050 code = reverse_condition_maybe_unordered (code);
10053 code = reverse_condition (code);
10054 if (compare_code != NIL)
10055 compare_code = reverse_condition (compare_code);
10059 if (compare_code != NIL)
10061 /* notl op1 (if needed)
10066 For x < 0 (resp. x <= -1) there will be no notl,
10067 so if possible swap the constants to get rid of the
10069 True/false will be -1/0 while code below (store flag
10070 followed by decrement) is 0/-1, so the constants need
10071 to be exchanged once more. */
10073 if (compare_code == GE || !cf)
10075 code = reverse_condition (code);
10080 HOST_WIDE_INT tmp = cf;
10085 out = emit_store_flag (out, code, ix86_compare_op0,
10086 ix86_compare_op1, VOIDmode, 0, -1);
10090 out = emit_store_flag (out, code, ix86_compare_op0,
10091 ix86_compare_op1, VOIDmode, 0, 1);
10093 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10094 copy_rtx (out), 1, OPTAB_DIRECT);
10097 out = expand_simple_binop (mode, AND, copy_rtx (out),
10098 gen_int_mode (cf - ct, mode),
10099 copy_rtx (out), 1, OPTAB_DIRECT);
10101 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10102 copy_rtx (out), 1, OPTAB_DIRECT);
10103 if (!rtx_equal_p (out, operands[0]))
10104 emit_move_insn (operands[0], copy_rtx (out));
10106 return 1; /* DONE */
10110 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10112 /* Try a few things more with specific constants and a variable. */
10115 rtx var, orig_out, out, tmp;
10117 if (BRANCH_COST <= 2)
10118 return 0; /* FAIL */
10120 /* If one of the two operands is an interesting constant, load a
10121 constant with the above and mask it in with a logical operation. */
10123 if (GET_CODE (operands[2]) == CONST_INT)
10126 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10127 operands[3] = constm1_rtx, op = and_optab;
10128 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10129 operands[3] = const0_rtx, op = ior_optab;
10131 return 0; /* FAIL */
10133 else if (GET_CODE (operands[3]) == CONST_INT)
10136 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10137 operands[2] = constm1_rtx, op = and_optab;
10138 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10139 operands[2] = const0_rtx, op = ior_optab;
10141 return 0; /* FAIL */
10144 return 0; /* FAIL */
10146 orig_out = operands[0];
10147 tmp = gen_reg_rtx (mode);
10150 /* Recurse to get the constant loaded. */
10151 if (ix86_expand_int_movcc (operands) == 0)
10152 return 0; /* FAIL */
10154 /* Mask in the interesting variable. */
10155 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10157 if (!rtx_equal_p (out, orig_out))
10158 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10160 return 1; /* DONE */
10164 * For comparison with above,
10174 if (! nonimmediate_operand (operands[2], mode))
10175 operands[2] = force_reg (mode, operands[2]);
10176 if (! nonimmediate_operand (operands[3], mode))
10177 operands[3] = force_reg (mode, operands[3]);
10179 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10181 rtx tmp = gen_reg_rtx (mode);
10182 emit_move_insn (tmp, operands[3]);
10185 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10187 rtx tmp = gen_reg_rtx (mode);
10188 emit_move_insn (tmp, operands[2]);
10192 if (! register_operand (operands[2], VOIDmode)
10194 || ! register_operand (operands[3], VOIDmode)))
10195 operands[2] = force_reg (mode, operands[2]);
10198 && ! register_operand (operands[3], VOIDmode))
10199 operands[3] = force_reg (mode, operands[3]);
10201 emit_insn (compare_seq);
10202 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10203 gen_rtx_IF_THEN_ELSE (mode,
10204 compare_op, operands[2],
10207 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10208 gen_rtx_IF_THEN_ELSE (mode,
10210 copy_rtx (operands[3]),
10211 copy_rtx (operands[0]))));
10213 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10214 gen_rtx_IF_THEN_ELSE (mode,
10216 copy_rtx (operands[2]),
10217 copy_rtx (operands[0]))));
10219 return 1; /* DONE */
10223 ix86_expand_fp_movcc (operands)
10226 enum rtx_code code;
10228 rtx compare_op, second_test, bypass_test;
10230 /* For SF/DFmode conditional moves based on comparisons
10231 in same mode, we may want to use SSE min/max instructions. */
10232 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10233 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10234 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10235 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10236 && (!TARGET_IEEE_FP
10237 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10238 /* We may be called from the post-reload splitter. */
10239 && (!REG_P (operands[0])
10240 || SSE_REG_P (operands[0])
10241 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10243 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10244 code = GET_CODE (operands[1]);
10246 /* See if we have (cross) match between comparison operands and
10247 conditional move operands. */
10248 if (rtx_equal_p (operands[2], op1))
10253 code = reverse_condition_maybe_unordered (code);
10255 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10257 /* Check for min operation. */
10258 if (code == LT || code == UNLE)
10266 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10267 if (memory_operand (op0, VOIDmode))
10268 op0 = force_reg (GET_MODE (operands[0]), op0);
10269 if (GET_MODE (operands[0]) == SFmode)
10270 emit_insn (gen_minsf3 (operands[0], op0, op1));
10272 emit_insn (gen_mindf3 (operands[0], op0, op1));
10275 /* Check for max operation. */
10276 if (code == GT || code == UNGE)
10284 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10285 if (memory_operand (op0, VOIDmode))
10286 op0 = force_reg (GET_MODE (operands[0]), op0);
10287 if (GET_MODE (operands[0]) == SFmode)
10288 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10290 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10294 /* Manage condition to be sse_comparison_operator. In case we are
10295 in non-ieee mode, try to canonicalize the destination operand
10296 to be first in the comparison - this helps reload to avoid extra
10298 if (!sse_comparison_operator (operands[1], VOIDmode)
10299 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10301 rtx tmp = ix86_compare_op0;
10302 ix86_compare_op0 = ix86_compare_op1;
10303 ix86_compare_op1 = tmp;
10304 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10305 VOIDmode, ix86_compare_op0,
10308 /* Similarly try to manage result to be first operand of conditional
10309 move. We also don't support the NE comparison on SSE, so try to
10311 if ((rtx_equal_p (operands[0], operands[3])
10312 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10313 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10315 rtx tmp = operands[2];
10316 operands[2] = operands[3];
10318 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10319 (GET_CODE (operands[1])),
10320 VOIDmode, ix86_compare_op0,
10323 if (GET_MODE (operands[0]) == SFmode)
10324 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10325 operands[2], operands[3],
10326 ix86_compare_op0, ix86_compare_op1));
10328 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10329 operands[2], operands[3],
10330 ix86_compare_op0, ix86_compare_op1));
10334 /* The floating point conditional move instructions don't directly
10335 support conditions resulting from a signed integer comparison. */
10337 code = GET_CODE (operands[1]);
10338 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10340 /* The floating point conditional move instructions don't directly
10341 support signed integer comparisons. */
10343 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10345 if (second_test != NULL || bypass_test != NULL)
10347 tmp = gen_reg_rtx (QImode);
10348 ix86_expand_setcc (code, tmp);
10350 ix86_compare_op0 = tmp;
10351 ix86_compare_op1 = const0_rtx;
10352 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10354 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10356 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10357 emit_move_insn (tmp, operands[3]);
10360 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10362 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10363 emit_move_insn (tmp, operands[2]);
10367 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10368 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10373 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10374 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10379 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10380 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10388 /* Expand conditional increment or decrement using adb/sbb instructions.
10389 The default case using setcc followed by the conditional move can be
10390 done by generic code. */
10392 ix86_expand_int_addcc (operands)
10395 enum rtx_code code = GET_CODE (operands[1]);
10397 rtx val = const0_rtx;
10398 bool fpcmp = false;
10399 enum machine_mode mode = GET_MODE (operands[0]);
10401 if (operands[3] != const1_rtx
10402 && operands[3] != constm1_rtx)
10404 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10405 ix86_compare_op1, &compare_op))
10407 code = GET_CODE (compare_op);
10409 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10410 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10413 code = ix86_fp_compare_code_to_integer (code);
10420 PUT_CODE (compare_op,
10421 reverse_condition_maybe_unordered
10422 (GET_CODE (compare_op)));
10424 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10426 PUT_MODE (compare_op, mode);
10428 /* Construct either adc or sbb insn. */
10429 if ((code == LTU) == (operands[3] == constm1_rtx))
10431 switch (GET_MODE (operands[0]))
10434 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10437 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10440 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10443 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10451 switch (GET_MODE (operands[0]))
10454 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10457 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10460 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10463 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10469 return 1; /* DONE */
10473 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10474 works for floating pointer parameters and nonoffsetable memories.
10475 For pushes, it returns just stack offsets; the values will be saved
10476 in the right order. Maximally three parts are generated. */
10479 ix86_split_to_parts (operand, parts, mode)
10482 enum machine_mode mode;
10487 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10489 size = (GET_MODE_SIZE (mode) + 4) / 8;
10491 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10493 if (size < 2 || size > 3)
10496 /* Optimize constant pool reference to immediates. This is used by fp
10497 moves, that force all constants to memory to allow combining. */
10498 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10500 rtx tmp = maybe_get_pool_constant (operand);
10505 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10507 /* The only non-offsetable memories we handle are pushes. */
10508 if (! push_operand (operand, VOIDmode))
10511 operand = copy_rtx (operand);
10512 PUT_MODE (operand, Pmode);
10513 parts[0] = parts[1] = parts[2] = operand;
10515 else if (!TARGET_64BIT)
10517 if (mode == DImode)
10518 split_di (&operand, 1, &parts[0], &parts[1]);
10521 if (REG_P (operand))
10523 if (!reload_completed)
10525 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10526 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10528 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10530 else if (offsettable_memref_p (operand))
10532 operand = adjust_address (operand, SImode, 0);
10533 parts[0] = operand;
10534 parts[1] = adjust_address (operand, SImode, 4);
10536 parts[2] = adjust_address (operand, SImode, 8);
10538 else if (GET_CODE (operand) == CONST_DOUBLE)
10543 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10548 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10549 parts[2] = gen_int_mode (l[2], SImode);
10552 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10557 parts[1] = gen_int_mode (l[1], SImode);
10558 parts[0] = gen_int_mode (l[0], SImode);
10566 if (mode == TImode)
10567 split_ti (&operand, 1, &parts[0], &parts[1]);
10568 if (mode == XFmode || mode == TFmode)
10570 if (REG_P (operand))
10572 if (!reload_completed)
10574 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10575 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10577 else if (offsettable_memref_p (operand))
10579 operand = adjust_address (operand, DImode, 0);
10580 parts[0] = operand;
10581 parts[1] = adjust_address (operand, SImode, 8);
10583 else if (GET_CODE (operand) == CONST_DOUBLE)
10588 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10589 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10590 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10591 if (HOST_BITS_PER_WIDE_INT >= 64)
10594 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10595 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10598 parts[0] = immed_double_const (l[0], l[1], DImode);
10599 parts[1] = gen_int_mode (l[2], SImode);
10609 /* Emit insns to perform a move or push of DI, DF, and XF values.
10610 Return false when normal moves are needed; true when all required
10611 insns have been emitted. Operands 2-4 contain the input values
10612 int the correct order; operands 5-7 contain the output values. */
10615 ix86_split_long_move (operands)
10621 int collisions = 0;
10622 enum machine_mode mode = GET_MODE (operands[0]);
10624 /* The DFmode expanders may ask us to move double.
10625 For 64bit target this is single move. By hiding the fact
10626 here we simplify i386.md splitters. */
10627 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10629 /* Optimize constant pool reference to immediates. This is used by
10630 fp moves, that force all constants to memory to allow combining. */
10632 if (GET_CODE (operands[1]) == MEM
10633 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10634 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10635 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10636 if (push_operand (operands[0], VOIDmode))
10638 operands[0] = copy_rtx (operands[0]);
10639 PUT_MODE (operands[0], Pmode);
10642 operands[0] = gen_lowpart (DImode, operands[0]);
10643 operands[1] = gen_lowpart (DImode, operands[1]);
10644 emit_move_insn (operands[0], operands[1]);
10648 /* The only non-offsettable memory we handle is push. */
10649 if (push_operand (operands[0], VOIDmode))
10651 else if (GET_CODE (operands[0]) == MEM
10652 && ! offsettable_memref_p (operands[0]))
10655 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10656 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10658 /* When emitting push, take care for source operands on the stack. */
10659 if (push && GET_CODE (operands[1]) == MEM
10660 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10663 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10664 XEXP (part[1][2], 0));
10665 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10666 XEXP (part[1][1], 0));
10669 /* We need to do copy in the right order in case an address register
10670 of the source overlaps the destination. */
10671 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10673 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10675 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10678 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10681 /* Collision in the middle part can be handled by reordering. */
10682 if (collisions == 1 && nparts == 3
10683 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10686 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10687 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10690 /* If there are more collisions, we can't handle it by reordering.
10691 Do an lea to the last part and use only one colliding move. */
10692 else if (collisions > 1)
10695 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10696 XEXP (part[1][0], 0)));
10697 part[1][0] = change_address (part[1][0],
10698 TARGET_64BIT ? DImode : SImode,
10699 part[0][nparts - 1]);
10700 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10702 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10712 /* We use only first 12 bytes of TFmode value, but for pushing we
10713 are required to adjust stack as if we were pushing real 16byte
10715 if (mode == TFmode && !TARGET_64BIT)
10716 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10718 emit_move_insn (part[0][2], part[1][2]);
10723 /* In 64bit mode we don't have 32bit push available. In case this is
10724 register, it is OK - we will just use larger counterpart. We also
10725 retype memory - these comes from attempt to avoid REX prefix on
10726 moving of second half of TFmode value. */
10727 if (GET_MODE (part[1][1]) == SImode)
10729 if (GET_CODE (part[1][1]) == MEM)
10730 part[1][1] = adjust_address (part[1][1], DImode, 0);
10731 else if (REG_P (part[1][1]))
10732 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10735 if (GET_MODE (part[1][0]) == SImode)
10736 part[1][0] = part[1][1];
10739 emit_move_insn (part[0][1], part[1][1]);
10740 emit_move_insn (part[0][0], part[1][0]);
10744 /* Choose correct order to not overwrite the source before it is copied. */
10745 if ((REG_P (part[0][0])
10746 && REG_P (part[1][1])
10747 && (REGNO (part[0][0]) == REGNO (part[1][1])
10749 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10751 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10755 operands[2] = part[0][2];
10756 operands[3] = part[0][1];
10757 operands[4] = part[0][0];
10758 operands[5] = part[1][2];
10759 operands[6] = part[1][1];
10760 operands[7] = part[1][0];
10764 operands[2] = part[0][1];
10765 operands[3] = part[0][0];
10766 operands[5] = part[1][1];
10767 operands[6] = part[1][0];
10774 operands[2] = part[0][0];
10775 operands[3] = part[0][1];
10776 operands[4] = part[0][2];
10777 operands[5] = part[1][0];
10778 operands[6] = part[1][1];
10779 operands[7] = part[1][2];
10783 operands[2] = part[0][0];
10784 operands[3] = part[0][1];
10785 operands[5] = part[1][0];
10786 operands[6] = part[1][1];
10789 emit_move_insn (operands[2], operands[5]);
10790 emit_move_insn (operands[3], operands[6]);
10792 emit_move_insn (operands[4], operands[7]);
10798 ix86_split_ashldi (operands, scratch)
10799 rtx *operands, scratch;
10801 rtx low[2], high[2];
10804 if (GET_CODE (operands[2]) == CONST_INT)
10806 split_di (operands, 2, low, high);
10807 count = INTVAL (operands[2]) & 63;
10811 emit_move_insn (high[0], low[1]);
10812 emit_move_insn (low[0], const0_rtx);
10815 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10819 if (!rtx_equal_p (operands[0], operands[1]))
10820 emit_move_insn (operands[0], operands[1]);
10821 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10822 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10827 if (!rtx_equal_p (operands[0], operands[1]))
10828 emit_move_insn (operands[0], operands[1]);
10830 split_di (operands, 1, low, high);
10832 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10833 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10835 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10837 if (! no_new_pseudos)
10838 scratch = force_reg (SImode, const0_rtx);
10840 emit_move_insn (scratch, const0_rtx);
10842 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10846 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10851 ix86_split_ashrdi (operands, scratch)
10852 rtx *operands, scratch;
10854 rtx low[2], high[2];
10857 if (GET_CODE (operands[2]) == CONST_INT)
10859 split_di (operands, 2, low, high);
10860 count = INTVAL (operands[2]) & 63;
10864 emit_move_insn (low[0], high[1]);
10866 if (! reload_completed)
10867 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10870 emit_move_insn (high[0], low[0]);
10871 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10875 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10879 if (!rtx_equal_p (operands[0], operands[1]))
10880 emit_move_insn (operands[0], operands[1]);
10881 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10882 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10887 if (!rtx_equal_p (operands[0], operands[1]))
10888 emit_move_insn (operands[0], operands[1]);
10890 split_di (operands, 1, low, high);
10892 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10893 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10895 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10897 if (! no_new_pseudos)
10898 scratch = gen_reg_rtx (SImode);
10899 emit_move_insn (scratch, high[0]);
10900 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10901 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10905 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10910 ix86_split_lshrdi (operands, scratch)
10911 rtx *operands, scratch;
10913 rtx low[2], high[2];
10916 if (GET_CODE (operands[2]) == CONST_INT)
10918 split_di (operands, 2, low, high);
10919 count = INTVAL (operands[2]) & 63;
10923 emit_move_insn (low[0], high[1]);
10924 emit_move_insn (high[0], const0_rtx);
10927 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10931 if (!rtx_equal_p (operands[0], operands[1]))
10932 emit_move_insn (operands[0], operands[1]);
10933 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10934 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10939 if (!rtx_equal_p (operands[0], operands[1]))
10940 emit_move_insn (operands[0], operands[1]);
10942 split_di (operands, 1, low, high);
10944 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10945 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10947 /* Heh. By reversing the arguments, we can reuse this pattern. */
10948 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10950 if (! no_new_pseudos)
10951 scratch = force_reg (SImode, const0_rtx);
10953 emit_move_insn (scratch, const0_rtx);
10955 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10959 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10963 /* Helper function for the string operations below. Dest VARIABLE whether
10964 it is aligned to VALUE bytes. If true, jump to the label. */
10966 ix86_expand_aligntest (variable, value)
10970 rtx label = gen_label_rtx ();
10971 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10972 if (GET_MODE (variable) == DImode)
10973 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10975 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10976 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10981 /* Adjust COUNTER by the VALUE. */
10983 ix86_adjust_counter (countreg, value)
10985 HOST_WIDE_INT value;
10987 if (GET_MODE (countreg) == DImode)
10988 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10990 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10993 /* Zero extend possibly SImode EXP to Pmode register. */
10995 ix86_zero_extend_to_Pmode (exp)
10999 if (GET_MODE (exp) == VOIDmode)
11000 return force_reg (Pmode, exp);
11001 if (GET_MODE (exp) == Pmode)
11002 return copy_to_mode_reg (Pmode, exp);
11003 r = gen_reg_rtx (Pmode);
11004 emit_insn (gen_zero_extendsidi2 (r, exp));
11008 /* Expand string move (memcpy) operation. Use i386 string operations when
11009 profitable. expand_clrstr contains similar code. */
11011 ix86_expand_movstr (dst, src, count_exp, align_exp)
11012 rtx dst, src, count_exp, align_exp;
11014 rtx srcreg, destreg, countreg;
11015 enum machine_mode counter_mode;
11016 HOST_WIDE_INT align = 0;
11017 unsigned HOST_WIDE_INT count = 0;
11020 if (GET_CODE (align_exp) == CONST_INT)
11021 align = INTVAL (align_exp);
11023 /* Can't use any of this if the user has appropriated esi or edi. */
11024 if (global_regs[4] || global_regs[5])
11027 /* This simple hack avoids all inlining code and simplifies code below. */
11028 if (!TARGET_ALIGN_STRINGOPS)
11031 if (GET_CODE (count_exp) == CONST_INT)
11033 count = INTVAL (count_exp);
11034 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11038 /* Figure out proper mode for counter. For 32bits it is always SImode,
11039 for 64bits use SImode when possible, otherwise DImode.
11040 Set count to number of bytes copied when known at compile time. */
11041 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11042 || x86_64_zero_extended_value (count_exp))
11043 counter_mode = SImode;
11045 counter_mode = DImode;
11049 if (counter_mode != SImode && counter_mode != DImode)
11052 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11053 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11055 emit_insn (gen_cld ());
11057 /* When optimizing for size emit simple rep ; movsb instruction for
11058 counts not divisible by 4. */
11060 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11062 countreg = ix86_zero_extend_to_Pmode (count_exp);
11064 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11065 destreg, srcreg, countreg));
11067 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11068 destreg, srcreg, countreg));
11071 /* For constant aligned (or small unaligned) copies use rep movsl
11072 followed by code copying the rest. For PentiumPro ensure 8 byte
11073 alignment to allow rep movsl acceleration. */
11075 else if (count != 0
11077 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11078 || optimize_size || count < (unsigned int) 64))
11080 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11081 if (count & ~(size - 1))
11083 countreg = copy_to_mode_reg (counter_mode,
11084 GEN_INT ((count >> (size == 4 ? 2 : 3))
11085 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11086 countreg = ix86_zero_extend_to_Pmode (countreg);
11090 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11091 destreg, srcreg, countreg));
11093 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11094 destreg, srcreg, countreg));
11097 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11098 destreg, srcreg, countreg));
11100 if (size == 8 && (count & 0x04))
11101 emit_insn (gen_strmovsi (destreg, srcreg));
11103 emit_insn (gen_strmovhi (destreg, srcreg));
11105 emit_insn (gen_strmovqi (destreg, srcreg));
11107 /* The generic code based on the glibc implementation:
11108 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11109 allowing accelerated copying there)
11110 - copy the data using rep movsl
11111 - copy the rest. */
11116 int desired_alignment = (TARGET_PENTIUMPRO
11117 && (count == 0 || count >= (unsigned int) 260)
11118 ? 8 : UNITS_PER_WORD);
11120 /* In case we don't know anything about the alignment, default to
11121 library version, since it is usually equally fast and result in
11124 Also emit call when we know that the count is large and call overhead
11125 will not be important. */
11126 if (!TARGET_INLINE_ALL_STRINGOPS
11127 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11133 if (TARGET_SINGLE_STRINGOP)
11134 emit_insn (gen_cld ());
11136 countreg2 = gen_reg_rtx (Pmode);
11137 countreg = copy_to_mode_reg (counter_mode, count_exp);
11139 /* We don't use loops to align destination and to copy parts smaller
11140 than 4 bytes, because gcc is able to optimize such code better (in
11141 the case the destination or the count really is aligned, gcc is often
11142 able to predict the branches) and also it is friendlier to the
11143 hardware branch prediction.
11145 Using loops is beneficial for generic case, because we can
11146 handle small counts using the loops. Many CPUs (such as Athlon)
11147 have large REP prefix setup costs.
11149 This is quite costly. Maybe we can revisit this decision later or
11150 add some customizability to this code. */
11152 if (count == 0 && align < desired_alignment)
11154 label = gen_label_rtx ();
11155 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11156 LEU, 0, counter_mode, 1, label);
11160 rtx label = ix86_expand_aligntest (destreg, 1);
11161 emit_insn (gen_strmovqi (destreg, srcreg));
11162 ix86_adjust_counter (countreg, 1);
11163 emit_label (label);
11164 LABEL_NUSES (label) = 1;
11168 rtx label = ix86_expand_aligntest (destreg, 2);
11169 emit_insn (gen_strmovhi (destreg, srcreg));
11170 ix86_adjust_counter (countreg, 2);
11171 emit_label (label);
11172 LABEL_NUSES (label) = 1;
11174 if (align <= 4 && desired_alignment > 4)
11176 rtx label = ix86_expand_aligntest (destreg, 4);
11177 emit_insn (gen_strmovsi (destreg, srcreg));
11178 ix86_adjust_counter (countreg, 4);
11179 emit_label (label);
11180 LABEL_NUSES (label) = 1;
11183 if (label && desired_alignment > 4 && !TARGET_64BIT)
11185 emit_label (label);
11186 LABEL_NUSES (label) = 1;
11189 if (!TARGET_SINGLE_STRINGOP)
11190 emit_insn (gen_cld ());
11193 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11195 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11196 destreg, srcreg, countreg2));
11200 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11201 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11202 destreg, srcreg, countreg2));
11207 emit_label (label);
11208 LABEL_NUSES (label) = 1;
11210 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11211 emit_insn (gen_strmovsi (destreg, srcreg));
11212 if ((align <= 4 || count == 0) && TARGET_64BIT)
11214 rtx label = ix86_expand_aligntest (countreg, 4);
11215 emit_insn (gen_strmovsi (destreg, srcreg));
11216 emit_label (label);
11217 LABEL_NUSES (label) = 1;
11219 if (align > 2 && count != 0 && (count & 2))
11220 emit_insn (gen_strmovhi (destreg, srcreg));
11221 if (align <= 2 || count == 0)
11223 rtx label = ix86_expand_aligntest (countreg, 2);
11224 emit_insn (gen_strmovhi (destreg, srcreg));
11225 emit_label (label);
11226 LABEL_NUSES (label) = 1;
11228 if (align > 1 && count != 0 && (count & 1))
11229 emit_insn (gen_strmovqi (destreg, srcreg));
11230 if (align <= 1 || count == 0)
11232 rtx label = ix86_expand_aligntest (countreg, 1);
11233 emit_insn (gen_strmovqi (destreg, srcreg));
11234 emit_label (label);
11235 LABEL_NUSES (label) = 1;
11239 insns = get_insns ();
11242 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11247 /* Expand string clear operation (bzero). Use i386 string operations when
11248 profitable. expand_movstr contains similar code. */
11250 ix86_expand_clrstr (src, count_exp, align_exp)
11251 rtx src, count_exp, align_exp;
11253 rtx destreg, zeroreg, countreg;
11254 enum machine_mode counter_mode;
11255 HOST_WIDE_INT align = 0;
11256 unsigned HOST_WIDE_INT count = 0;
11258 if (GET_CODE (align_exp) == CONST_INT)
11259 align = INTVAL (align_exp);
11261 /* Can't use any of this if the user has appropriated esi. */
11262 if (global_regs[4])
11265 /* This simple hack avoids all inlining code and simplifies code below. */
11266 if (!TARGET_ALIGN_STRINGOPS)
11269 if (GET_CODE (count_exp) == CONST_INT)
11271 count = INTVAL (count_exp);
11272 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11275 /* Figure out proper mode for counter. For 32bits it is always SImode,
11276 for 64bits use SImode when possible, otherwise DImode.
11277 Set count to number of bytes copied when known at compile time. */
11278 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11279 || x86_64_zero_extended_value (count_exp))
11280 counter_mode = SImode;
11282 counter_mode = DImode;
11284 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11286 emit_insn (gen_cld ());
11288 /* When optimizing for size emit simple rep ; movsb instruction for
11289 counts not divisible by 4. */
11291 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11293 countreg = ix86_zero_extend_to_Pmode (count_exp);
11294 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11296 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11297 destreg, countreg));
11299 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11300 destreg, countreg));
11302 else if (count != 0
11304 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11305 || optimize_size || count < (unsigned int) 64))
11307 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11308 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11309 if (count & ~(size - 1))
11311 countreg = copy_to_mode_reg (counter_mode,
11312 GEN_INT ((count >> (size == 4 ? 2 : 3))
11313 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11314 countreg = ix86_zero_extend_to_Pmode (countreg);
11318 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11319 destreg, countreg));
11321 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11322 destreg, countreg));
11325 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11326 destreg, countreg));
11328 if (size == 8 && (count & 0x04))
11329 emit_insn (gen_strsetsi (destreg,
11330 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11332 emit_insn (gen_strsethi (destreg,
11333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11335 emit_insn (gen_strsetqi (destreg,
11336 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11342 /* Compute desired alignment of the string operation. */
11343 int desired_alignment = (TARGET_PENTIUMPRO
11344 && (count == 0 || count >= (unsigned int) 260)
11345 ? 8 : UNITS_PER_WORD);
11347 /* In case we don't know anything about the alignment, default to
11348 library version, since it is usually equally fast and result in
11351 Also emit call when we know that the count is large and call overhead
11352 will not be important. */
11353 if (!TARGET_INLINE_ALL_STRINGOPS
11354 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11357 if (TARGET_SINGLE_STRINGOP)
11358 emit_insn (gen_cld ());
11360 countreg2 = gen_reg_rtx (Pmode);
11361 countreg = copy_to_mode_reg (counter_mode, count_exp);
11362 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11364 if (count == 0 && align < desired_alignment)
11366 label = gen_label_rtx ();
11367 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11368 LEU, 0, counter_mode, 1, label);
11372 rtx label = ix86_expand_aligntest (destreg, 1);
11373 emit_insn (gen_strsetqi (destreg,
11374 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11375 ix86_adjust_counter (countreg, 1);
11376 emit_label (label);
11377 LABEL_NUSES (label) = 1;
11381 rtx label = ix86_expand_aligntest (destreg, 2);
11382 emit_insn (gen_strsethi (destreg,
11383 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11384 ix86_adjust_counter (countreg, 2);
11385 emit_label (label);
11386 LABEL_NUSES (label) = 1;
11388 if (align <= 4 && desired_alignment > 4)
11390 rtx label = ix86_expand_aligntest (destreg, 4);
11391 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11392 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11394 ix86_adjust_counter (countreg, 4);
11395 emit_label (label);
11396 LABEL_NUSES (label) = 1;
11399 if (label && desired_alignment > 4 && !TARGET_64BIT)
11401 emit_label (label);
11402 LABEL_NUSES (label) = 1;
11406 if (!TARGET_SINGLE_STRINGOP)
11407 emit_insn (gen_cld ());
11410 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11412 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11413 destreg, countreg2));
11417 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11418 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11419 destreg, countreg2));
11423 emit_label (label);
11424 LABEL_NUSES (label) = 1;
11427 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11428 emit_insn (gen_strsetsi (destreg,
11429 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11430 if (TARGET_64BIT && (align <= 4 || count == 0))
11432 rtx label = ix86_expand_aligntest (countreg, 4);
11433 emit_insn (gen_strsetsi (destreg,
11434 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11435 emit_label (label);
11436 LABEL_NUSES (label) = 1;
11438 if (align > 2 && count != 0 && (count & 2))
11439 emit_insn (gen_strsethi (destreg,
11440 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11441 if (align <= 2 || count == 0)
11443 rtx label = ix86_expand_aligntest (countreg, 2);
11444 emit_insn (gen_strsethi (destreg,
11445 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11446 emit_label (label);
11447 LABEL_NUSES (label) = 1;
11449 if (align > 1 && count != 0 && (count & 1))
11450 emit_insn (gen_strsetqi (destreg,
11451 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11452 if (align <= 1 || count == 0)
11454 rtx label = ix86_expand_aligntest (countreg, 1);
11455 emit_insn (gen_strsetqi (destreg,
11456 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11457 emit_label (label);
11458 LABEL_NUSES (label) = 1;
11463 /* Expand strlen. */
11465 ix86_expand_strlen (out, src, eoschar, align)
11466 rtx out, src, eoschar, align;
11468 rtx addr, scratch1, scratch2, scratch3, scratch4;
11470 /* The generic case of strlen expander is long. Avoid it's
11471 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11473 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11474 && !TARGET_INLINE_ALL_STRINGOPS
11476 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11479 addr = force_reg (Pmode, XEXP (src, 0));
11480 scratch1 = gen_reg_rtx (Pmode);
11482 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11485 /* Well it seems that some optimizer does not combine a call like
11486 foo(strlen(bar), strlen(bar));
11487 when the move and the subtraction is done here. It does calculate
11488 the length just once when these instructions are done inside of
11489 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11490 often used and I use one fewer register for the lifetime of
11491 output_strlen_unroll() this is better. */
11493 emit_move_insn (out, addr);
11495 ix86_expand_strlensi_unroll_1 (out, align);
11497 /* strlensi_unroll_1 returns the address of the zero at the end of
11498 the string, like memchr(), so compute the length by subtracting
11499 the start address. */
11501 emit_insn (gen_subdi3 (out, out, addr));
11503 emit_insn (gen_subsi3 (out, out, addr));
11507 scratch2 = gen_reg_rtx (Pmode);
11508 scratch3 = gen_reg_rtx (Pmode);
11509 scratch4 = force_reg (Pmode, constm1_rtx);
11511 emit_move_insn (scratch3, addr);
11512 eoschar = force_reg (QImode, eoschar);
11514 emit_insn (gen_cld ());
11517 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11518 align, scratch4, scratch3));
11519 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11520 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11524 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11525 align, scratch4, scratch3));
11526 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11527 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11533 /* Expand the appropriate insns for doing strlen if not just doing
11536 out = result, initialized with the start address
11537 align_rtx = alignment of the address.
11538 scratch = scratch register, initialized with the startaddress when
11539 not aligned, otherwise undefined
11541 This is just the body. It needs the initialisations mentioned above and
11542 some address computing at the end. These things are done in i386.md. */
11545 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11546 rtx out, align_rtx;
11550 rtx align_2_label = NULL_RTX;
11551 rtx align_3_label = NULL_RTX;
11552 rtx align_4_label = gen_label_rtx ();
11553 rtx end_0_label = gen_label_rtx ();
11555 rtx tmpreg = gen_reg_rtx (SImode);
11556 rtx scratch = gen_reg_rtx (SImode);
11560 if (GET_CODE (align_rtx) == CONST_INT)
11561 align = INTVAL (align_rtx);
11563 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11565 /* Is there a known alignment and is it less than 4? */
11568 rtx scratch1 = gen_reg_rtx (Pmode);
11569 emit_move_insn (scratch1, out);
11570 /* Is there a known alignment and is it not 2? */
11573 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11574 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11576 /* Leave just the 3 lower bits. */
11577 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11578 NULL_RTX, 0, OPTAB_WIDEN);
11580 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11581 Pmode, 1, align_4_label);
11582 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11583 Pmode, 1, align_2_label);
11584 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11585 Pmode, 1, align_3_label);
11589 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11590 check if is aligned to 4 - byte. */
11592 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11593 NULL_RTX, 0, OPTAB_WIDEN);
11595 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11596 Pmode, 1, align_4_label);
11599 mem = gen_rtx_MEM (QImode, out);
11601 /* Now compare the bytes. */
11603 /* Compare the first n unaligned byte on a byte per byte basis. */
11604 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11605 QImode, 1, end_0_label);
11607 /* Increment the address. */
11609 emit_insn (gen_adddi3 (out, out, const1_rtx));
11611 emit_insn (gen_addsi3 (out, out, const1_rtx));
11613 /* Not needed with an alignment of 2 */
11616 emit_label (align_2_label);
11618 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11622 emit_insn (gen_adddi3 (out, out, const1_rtx));
11624 emit_insn (gen_addsi3 (out, out, const1_rtx));
11626 emit_label (align_3_label);
11629 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11633 emit_insn (gen_adddi3 (out, out, const1_rtx));
11635 emit_insn (gen_addsi3 (out, out, const1_rtx));
11638 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11639 align this loop. It gives only huge programs, but does not help to
11641 emit_label (align_4_label);
11643 mem = gen_rtx_MEM (SImode, out);
11644 emit_move_insn (scratch, mem);
11646 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11648 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11650 /* This formula yields a nonzero result iff one of the bytes is zero.
11651 This saves three branches inside loop and many cycles. */
11653 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11654 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11655 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11656 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11657 gen_int_mode (0x80808080, SImode)));
11658 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11663 rtx reg = gen_reg_rtx (SImode);
11664 rtx reg2 = gen_reg_rtx (Pmode);
11665 emit_move_insn (reg, tmpreg);
11666 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11668 /* If zero is not in the first two bytes, move two bytes forward. */
11669 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11670 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11671 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11672 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11673 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11676 /* Emit lea manually to avoid clobbering of flags. */
11677 emit_insn (gen_rtx_SET (SImode, reg2,
11678 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11680 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11681 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11682 emit_insn (gen_rtx_SET (VOIDmode, out,
11683 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11690 rtx end_2_label = gen_label_rtx ();
11691 /* Is zero in the first two bytes? */
11693 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11694 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11695 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11696 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11697 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11699 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11700 JUMP_LABEL (tmp) = end_2_label;
11702 /* Not in the first two. Move two bytes forward. */
11703 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11705 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11707 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11709 emit_label (end_2_label);
11713 /* Avoid branch in fixing the byte. */
11714 tmpreg = gen_lowpart (QImode, tmpreg);
11715 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11716 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11718 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11720 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11722 emit_label (end_0_label);
11726 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11727 rtx retval, fnaddr, callarg1, callarg2, pop;
11730 rtx use = NULL, call;
11732 if (pop == const0_rtx)
11734 if (TARGET_64BIT && pop)
11738 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11739 fnaddr = machopic_indirect_call_target (fnaddr);
11741 /* Static functions and indirect calls don't need the pic register. */
11742 if (! TARGET_64BIT && flag_pic
11743 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11744 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11745 use_reg (&use, pic_offset_table_rtx);
11747 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11749 rtx al = gen_rtx_REG (QImode, 0);
11750 emit_move_insn (al, callarg2);
11751 use_reg (&use, al);
11753 #endif /* TARGET_MACHO */
11755 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11757 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11758 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11760 if (sibcall && TARGET_64BIT
11761 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11764 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11765 fnaddr = gen_rtx_REG (Pmode, 40);
11766 emit_move_insn (fnaddr, addr);
11767 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11770 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11772 call = gen_rtx_SET (VOIDmode, retval, call);
11775 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11776 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11777 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11780 call = emit_call_insn (call);
11782 CALL_INSN_FUNCTION_USAGE (call) = use;
11786 /* Clear stack slot assignments remembered from previous functions.
11787 This is called from INIT_EXPANDERS once before RTL is emitted for each
11790 static struct machine_function *
11791 ix86_init_machine_status ()
11793 struct machine_function *f;
11795 f = ggc_alloc_cleared (sizeof (struct machine_function));
11796 f->use_fast_prologue_epilogue_nregs = -1;
11801 /* Return a MEM corresponding to a stack slot with mode MODE.
11802 Allocate a new slot if necessary.
11804 The RTL for a function can have several slots available: N is
11805 which slot to use. */
11808 assign_386_stack_local (mode, n)
11809 enum machine_mode mode;
11812 struct stack_local_entry *s;
11814 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11817 for (s = ix86_stack_locals; s; s = s->next)
11818 if (s->mode == mode && s->n == n)
11821 s = (struct stack_local_entry *)
11822 ggc_alloc (sizeof (struct stack_local_entry));
11825 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11827 s->next = ix86_stack_locals;
11828 ix86_stack_locals = s;
11832 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11834 static GTY(()) rtx ix86_tls_symbol;
11836 ix86_tls_get_addr ()
11839 if (!ix86_tls_symbol)
11841 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11842 (TARGET_GNU_TLS && !TARGET_64BIT)
11843 ? "___tls_get_addr"
11844 : "__tls_get_addr");
11847 return ix86_tls_symbol;
11850 /* Calculate the length of the memory address in the instruction
11851 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11854 memory_address_length (addr)
11857 struct ix86_address parts;
11858 rtx base, index, disp;
11861 if (GET_CODE (addr) == PRE_DEC
11862 || GET_CODE (addr) == POST_INC
11863 || GET_CODE (addr) == PRE_MODIFY
11864 || GET_CODE (addr) == POST_MODIFY)
11867 if (! ix86_decompose_address (addr, &parts))
11871 index = parts.index;
11875 /* Register Indirect. */
11876 if (base && !index && !disp)
11878 /* Special cases: ebp and esp need the two-byte modrm form. */
11879 if (addr == stack_pointer_rtx
11880 || addr == arg_pointer_rtx
11881 || addr == frame_pointer_rtx
11882 || addr == hard_frame_pointer_rtx)
11886 /* Direct Addressing. */
11887 else if (disp && !base && !index)
11892 /* Find the length of the displacement constant. */
11895 if (GET_CODE (disp) == CONST_INT
11896 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11903 /* An index requires the two-byte modrm form. */
11911 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11912 is set, expect that insn have 8bit immediate alternative. */
11914 ix86_attr_length_immediate_default (insn, shortform)
11920 extract_insn_cached (insn);
11921 for (i = recog_data.n_operands - 1; i >= 0; --i)
11922 if (CONSTANT_P (recog_data.operand[i]))
11927 && GET_CODE (recog_data.operand[i]) == CONST_INT
11928 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11932 switch (get_attr_mode (insn))
11943 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11948 fatal_insn ("unknown insn mode", insn);
11954 /* Compute default value for "length_address" attribute. */
11956 ix86_attr_length_address_default (insn)
11961 if (get_attr_type (insn) == TYPE_LEA)
11963 rtx set = PATTERN (insn);
11964 if (GET_CODE (set) == SET)
11966 else if (GET_CODE (set) == PARALLEL
11967 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11968 set = XVECEXP (set, 0, 0);
11971 #ifdef ENABLE_CHECKING
11977 return memory_address_length (SET_SRC (set));
11980 extract_insn_cached (insn);
11981 for (i = recog_data.n_operands - 1; i >= 0; --i)
11982 if (GET_CODE (recog_data.operand[i]) == MEM)
11984 return memory_address_length (XEXP (recog_data.operand[i], 0));
11990 /* Return the maximum number of instructions a cpu can issue. */
11997 case PROCESSOR_PENTIUM:
12001 case PROCESSOR_PENTIUMPRO:
12002 case PROCESSOR_PENTIUM4:
12003 case PROCESSOR_ATHLON:
12012 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12013 by DEP_INSN and nothing set by DEP_INSN. */
12016 ix86_flags_dependant (insn, dep_insn, insn_type)
12017 rtx insn, dep_insn;
12018 enum attr_type insn_type;
12022 /* Simplify the test for uninteresting insns. */
12023 if (insn_type != TYPE_SETCC
12024 && insn_type != TYPE_ICMOV
12025 && insn_type != TYPE_FCMOV
12026 && insn_type != TYPE_IBR)
12029 if ((set = single_set (dep_insn)) != 0)
12031 set = SET_DEST (set);
12034 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12035 && XVECLEN (PATTERN (dep_insn), 0) == 2
12036 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12037 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12039 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12040 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12045 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12048 /* This test is true if the dependent insn reads the flags but
12049 not any other potentially set register. */
12050 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12053 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12059 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12060 address with operands set by DEP_INSN. */
12063 ix86_agi_dependant (insn, dep_insn, insn_type)
12064 rtx insn, dep_insn;
12065 enum attr_type insn_type;
12069 if (insn_type == TYPE_LEA
12072 addr = PATTERN (insn);
12073 if (GET_CODE (addr) == SET)
12075 else if (GET_CODE (addr) == PARALLEL
12076 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12077 addr = XVECEXP (addr, 0, 0);
12080 addr = SET_SRC (addr);
12085 extract_insn_cached (insn);
12086 for (i = recog_data.n_operands - 1; i >= 0; --i)
12087 if (GET_CODE (recog_data.operand[i]) == MEM)
12089 addr = XEXP (recog_data.operand[i], 0);
12096 return modified_in_p (addr, dep_insn);
12100 ix86_adjust_cost (insn, link, dep_insn, cost)
12101 rtx insn, link, dep_insn;
12104 enum attr_type insn_type, dep_insn_type;
12105 enum attr_memory memory, dep_memory;
12107 int dep_insn_code_number;
12109 /* Anti and output dependencies have zero cost on all CPUs. */
12110 if (REG_NOTE_KIND (link) != 0)
12113 dep_insn_code_number = recog_memoized (dep_insn);
12115 /* If we can't recognize the insns, we can't really do anything. */
12116 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12119 insn_type = get_attr_type (insn);
12120 dep_insn_type = get_attr_type (dep_insn);
12124 case PROCESSOR_PENTIUM:
12125 /* Address Generation Interlock adds a cycle of latency. */
12126 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12129 /* ??? Compares pair with jump/setcc. */
12130 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12133 /* Floating point stores require value to be ready one cycle earlier. */
12134 if (insn_type == TYPE_FMOV
12135 && get_attr_memory (insn) == MEMORY_STORE
12136 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12140 case PROCESSOR_PENTIUMPRO:
12141 memory = get_attr_memory (insn);
12142 dep_memory = get_attr_memory (dep_insn);
12144 /* Since we can't represent delayed latencies of load+operation,
12145 increase the cost here for non-imov insns. */
12146 if (dep_insn_type != TYPE_IMOV
12147 && dep_insn_type != TYPE_FMOV
12148 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12151 /* INT->FP conversion is expensive. */
12152 if (get_attr_fp_int_src (dep_insn))
12155 /* There is one cycle extra latency between an FP op and a store. */
12156 if (insn_type == TYPE_FMOV
12157 && (set = single_set (dep_insn)) != NULL_RTX
12158 && (set2 = single_set (insn)) != NULL_RTX
12159 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12160 && GET_CODE (SET_DEST (set2)) == MEM)
12163 /* Show ability of reorder buffer to hide latency of load by executing
12164 in parallel with previous instruction in case
12165 previous instruction is not needed to compute the address. */
12166 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12167 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12169 /* Claim moves to take one cycle, as core can issue one load
12170 at time and the next load can start cycle later. */
12171 if (dep_insn_type == TYPE_IMOV
12172 || dep_insn_type == TYPE_FMOV)
12180 memory = get_attr_memory (insn);
12181 dep_memory = get_attr_memory (dep_insn);
12182 /* The esp dependency is resolved before the instruction is really
12184 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12185 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12188 /* Since we can't represent delayed latencies of load+operation,
12189 increase the cost here for non-imov insns. */
12190 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12191 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12193 /* INT->FP conversion is expensive. */
12194 if (get_attr_fp_int_src (dep_insn))
12197 /* Show ability of reorder buffer to hide latency of load by executing
12198 in parallel with previous instruction in case
12199 previous instruction is not needed to compute the address. */
12200 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12201 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12203 /* Claim moves to take one cycle, as core can issue one load
12204 at time and the next load can start cycle later. */
12205 if (dep_insn_type == TYPE_IMOV
12206 || dep_insn_type == TYPE_FMOV)
12215 case PROCESSOR_ATHLON:
12217 memory = get_attr_memory (insn);
12218 dep_memory = get_attr_memory (dep_insn);
12220 /* Show ability of reorder buffer to hide latency of load by executing
12221 in parallel with previous instruction in case
12222 previous instruction is not needed to compute the address. */
12223 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12224 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12226 /* Claim moves to take one cycle, as core can issue one load
12227 at time and the next load can start cycle later. */
12228 if (dep_insn_type == TYPE_IMOV
12229 || dep_insn_type == TYPE_FMOV)
12231 else if (cost >= 3)
12246 struct ppro_sched_data
12249 int issued_this_cycle;
12253 static enum attr_ppro_uops
12254 ix86_safe_ppro_uops (insn)
12257 if (recog_memoized (insn) >= 0)
12258 return get_attr_ppro_uops (insn);
12260 return PPRO_UOPS_MANY;
12264 ix86_dump_ppro_packet (dump)
12267 if (ix86_sched_data.ppro.decode[0])
12269 fprintf (dump, "PPRO packet: %d",
12270 INSN_UID (ix86_sched_data.ppro.decode[0]));
12271 if (ix86_sched_data.ppro.decode[1])
12272 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12273 if (ix86_sched_data.ppro.decode[2])
12274 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12275 fputc ('\n', dump);
12279 /* We're beginning a new block. Initialize data structures as necessary. */
12282 ix86_sched_init (dump, sched_verbose, veclen)
12283 FILE *dump ATTRIBUTE_UNUSED;
12284 int sched_verbose ATTRIBUTE_UNUSED;
12285 int veclen ATTRIBUTE_UNUSED;
12287 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12290 /* Shift INSN to SLOT, and shift everything else down. */
12293 ix86_reorder_insn (insnp, slot)
12300 insnp[0] = insnp[1];
12301 while (++insnp != slot);
12307 ix86_sched_reorder_ppro (ready, e_ready)
12312 enum attr_ppro_uops cur_uops;
12313 int issued_this_cycle;
12317 /* At this point .ppro.decode contains the state of the three
12318 decoders from last "cycle". That is, those insns that were
12319 actually independent. But here we're scheduling for the
12320 decoder, and we may find things that are decodable in the
12323 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12324 issued_this_cycle = 0;
12327 cur_uops = ix86_safe_ppro_uops (*insnp);
12329 /* If the decoders are empty, and we've a complex insn at the
12330 head of the priority queue, let it issue without complaint. */
12331 if (decode[0] == NULL)
12333 if (cur_uops == PPRO_UOPS_MANY)
12335 decode[0] = *insnp;
12339 /* Otherwise, search for a 2-4 uop unsn to issue. */
12340 while (cur_uops != PPRO_UOPS_FEW)
12342 if (insnp == ready)
12344 cur_uops = ix86_safe_ppro_uops (*--insnp);
12347 /* If so, move it to the head of the line. */
12348 if (cur_uops == PPRO_UOPS_FEW)
12349 ix86_reorder_insn (insnp, e_ready);
12351 /* Issue the head of the queue. */
12352 issued_this_cycle = 1;
12353 decode[0] = *e_ready--;
12356 /* Look for simple insns to fill in the other two slots. */
12357 for (i = 1; i < 3; ++i)
12358 if (decode[i] == NULL)
12360 if (ready > e_ready)
12364 cur_uops = ix86_safe_ppro_uops (*insnp);
12365 while (cur_uops != PPRO_UOPS_ONE)
12367 if (insnp == ready)
12369 cur_uops = ix86_safe_ppro_uops (*--insnp);
12372 /* Found one. Move it to the head of the queue and issue it. */
12373 if (cur_uops == PPRO_UOPS_ONE)
12375 ix86_reorder_insn (insnp, e_ready);
12376 decode[i] = *e_ready--;
12377 issued_this_cycle++;
12381 /* ??? Didn't find one. Ideally, here we would do a lazy split
12382 of 2-uop insns, issue one and queue the other. */
12386 if (issued_this_cycle == 0)
12387 issued_this_cycle = 1;
12388 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12391 /* We are about to being issuing insns for this clock cycle.
12392 Override the default sort algorithm to better slot instructions. */
12394 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12395 FILE *dump ATTRIBUTE_UNUSED;
12396 int sched_verbose ATTRIBUTE_UNUSED;
12399 int clock_var ATTRIBUTE_UNUSED;
12401 int n_ready = *n_readyp;
12402 rtx *e_ready = ready + n_ready - 1;
12404 /* Make sure to go ahead and initialize key items in
12405 ix86_sched_data if we are not going to bother trying to
12406 reorder the ready queue. */
12409 ix86_sched_data.ppro.issued_this_cycle = 1;
12418 case PROCESSOR_PENTIUMPRO:
12419 ix86_sched_reorder_ppro (ready, e_ready);
12424 return ix86_issue_rate ();
12427 /* We are about to issue INSN. Return the number of insns left on the
12428 ready queue that can be issued this cycle. */
12431 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12435 int can_issue_more;
12441 return can_issue_more - 1;
12443 case PROCESSOR_PENTIUMPRO:
12445 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12447 if (uops == PPRO_UOPS_MANY)
12450 ix86_dump_ppro_packet (dump);
12451 ix86_sched_data.ppro.decode[0] = insn;
12452 ix86_sched_data.ppro.decode[1] = NULL;
12453 ix86_sched_data.ppro.decode[2] = NULL;
12455 ix86_dump_ppro_packet (dump);
12456 ix86_sched_data.ppro.decode[0] = NULL;
12458 else if (uops == PPRO_UOPS_FEW)
12461 ix86_dump_ppro_packet (dump);
12462 ix86_sched_data.ppro.decode[0] = insn;
12463 ix86_sched_data.ppro.decode[1] = NULL;
12464 ix86_sched_data.ppro.decode[2] = NULL;
12468 for (i = 0; i < 3; ++i)
12469 if (ix86_sched_data.ppro.decode[i] == NULL)
12471 ix86_sched_data.ppro.decode[i] = insn;
12479 ix86_dump_ppro_packet (dump);
12480 ix86_sched_data.ppro.decode[0] = NULL;
12481 ix86_sched_data.ppro.decode[1] = NULL;
12482 ix86_sched_data.ppro.decode[2] = NULL;
12486 return --ix86_sched_data.ppro.issued_this_cycle;
12491 ia32_use_dfa_pipeline_interface ()
12493 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12498 /* How many alternative schedules to try. This should be as wide as the
12499 scheduling freedom in the DFA, but no wider. Making this value too
12500 large results extra work for the scheduler. */
12503 ia32_multipass_dfa_lookahead ()
12505 if (ix86_tune == PROCESSOR_PENTIUM)
12512 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12513 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12517 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12519 rtx dstref, srcref, dstreg, srcreg;
12523 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12525 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12529 /* Subroutine of above to actually do the updating by recursively walking
12533 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12535 rtx dstref, srcref, dstreg, srcreg;
12537 enum rtx_code code = GET_CODE (x);
12538 const char *format_ptr = GET_RTX_FORMAT (code);
12541 if (code == MEM && XEXP (x, 0) == dstreg)
12542 MEM_COPY_ATTRIBUTES (x, dstref);
12543 else if (code == MEM && XEXP (x, 0) == srcreg)
12544 MEM_COPY_ATTRIBUTES (x, srcref);
12546 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12548 if (*format_ptr == 'e')
12549 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12551 else if (*format_ptr == 'E')
12552 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12553 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12558 /* Compute the alignment given to a constant that is being placed in memory.
12559 EXP is the constant and ALIGN is the alignment that the object would
12561 The value of this function is used instead of that alignment to align
12565 ix86_constant_alignment (exp, align)
12569 if (TREE_CODE (exp) == REAL_CST)
12571 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12573 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12576 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12583 /* Compute the alignment for a static variable.
12584 TYPE is the data type, and ALIGN is the alignment that
12585 the object would ordinarily have. The value of this function is used
12586 instead of that alignment to align the object. */
12589 ix86_data_alignment (type, align)
12593 if (AGGREGATE_TYPE_P (type)
12594 && TYPE_SIZE (type)
12595 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12596 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12597 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12600 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12601 to 16byte boundary. */
12604 if (AGGREGATE_TYPE_P (type)
12605 && TYPE_SIZE (type)
12606 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12607 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12608 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12612 if (TREE_CODE (type) == ARRAY_TYPE)
12614 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12616 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12619 else if (TREE_CODE (type) == COMPLEX_TYPE)
12622 if (TYPE_MODE (type) == DCmode && align < 64)
12624 if (TYPE_MODE (type) == XCmode && align < 128)
12627 else if ((TREE_CODE (type) == RECORD_TYPE
12628 || TREE_CODE (type) == UNION_TYPE
12629 || TREE_CODE (type) == QUAL_UNION_TYPE)
12630 && TYPE_FIELDS (type))
12632 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12634 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12637 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12638 || TREE_CODE (type) == INTEGER_TYPE)
12640 if (TYPE_MODE (type) == DFmode && align < 64)
12642 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12649 /* Compute the alignment for a local variable.
12650 TYPE is the data type, and ALIGN is the alignment that
12651 the object would ordinarily have. The value of this macro is used
12652 instead of that alignment to align the object. */
12655 ix86_local_alignment (type, align)
12659 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12660 to 16byte boundary. */
12663 if (AGGREGATE_TYPE_P (type)
12664 && TYPE_SIZE (type)
12665 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12666 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12667 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12670 if (TREE_CODE (type) == ARRAY_TYPE)
12672 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12674 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12677 else if (TREE_CODE (type) == COMPLEX_TYPE)
12679 if (TYPE_MODE (type) == DCmode && align < 64)
12681 if (TYPE_MODE (type) == XCmode && align < 128)
12684 else if ((TREE_CODE (type) == RECORD_TYPE
12685 || TREE_CODE (type) == UNION_TYPE
12686 || TREE_CODE (type) == QUAL_UNION_TYPE)
12687 && TYPE_FIELDS (type))
12689 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12691 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12694 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12695 || TREE_CODE (type) == INTEGER_TYPE)
12698 if (TYPE_MODE (type) == DFmode && align < 64)
12700 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12706 /* Emit RTL insns to initialize the variable parts of a trampoline.
12707 FNADDR is an RTX for the address of the function's pure code.
12708 CXT is an RTX for the static chain value for the function. */
12710 x86_initialize_trampoline (tramp, fnaddr, cxt)
12711 rtx tramp, fnaddr, cxt;
12715 /* Compute offset from the end of the jmp to the target function. */
12716 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12717 plus_constant (tramp, 10),
12718 NULL_RTX, 1, OPTAB_DIRECT);
12719 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12720 gen_int_mode (0xb9, QImode));
12721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12722 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12723 gen_int_mode (0xe9, QImode));
12724 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12729 /* Try to load address using shorter movl instead of movabs.
12730 We may want to support movq for kernel mode, but kernel does not use
12731 trampolines at the moment. */
12732 if (x86_64_zero_extended_value (fnaddr))
12734 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12735 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12736 gen_int_mode (0xbb41, HImode));
12737 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12738 gen_lowpart (SImode, fnaddr));
12743 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12744 gen_int_mode (0xbb49, HImode));
12745 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12749 /* Load static chain using movabs to r10. */
12750 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12751 gen_int_mode (0xba49, HImode));
12752 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12755 /* Jump to the r11 */
12756 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12757 gen_int_mode (0xff49, HImode));
12758 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12759 gen_int_mode (0xe3, QImode));
12761 if (offset > TRAMPOLINE_SIZE)
12765 #ifdef TRANSFER_FROM_TRAMPOLINE
12766 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12767 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12771 #define def_builtin(MASK, NAME, TYPE, CODE) \
12773 if ((MASK) & target_flags \
12774 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12775 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12776 NULL, NULL_TREE); \
12779 struct builtin_description
12781 const unsigned int mask;
12782 const enum insn_code icode;
12783 const char *const name;
12784 const enum ix86_builtins code;
12785 const enum rtx_code comparison;
12786 const unsigned int flag;
12789 /* Used for builtins that are enabled both by -msse and -msse2. */
12790 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12791 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12792 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12794 static const struct builtin_description bdesc_comi[] =
12796 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12797 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12798 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12799 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12800 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12801 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12802 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12803 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12804 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12805 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12806 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12807 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12822 static const struct builtin_description bdesc_2arg[] =
12825 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12826 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12827 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12828 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12829 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12830 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12831 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12832 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12834 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12835 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12836 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12837 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12838 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12839 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12840 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12841 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12842 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12843 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12844 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12845 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12846 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12847 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12848 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12849 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12850 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12851 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12852 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12853 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12855 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12856 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12857 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12858 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12860 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12861 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12862 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12863 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12865 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12866 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12867 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12868 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12869 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12872 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12873 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12874 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12875 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12876 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12877 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12878 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12879 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12881 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12882 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12883 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12884 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12885 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12886 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12887 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12888 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12890 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12891 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12892 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12895 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12896 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12897 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12899 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12900 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12903 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12905 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12906 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12907 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12909 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12910 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12911 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12912 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12917 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12918 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12919 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12922 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12923 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12924 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12926 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12927 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12928 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12931 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12932 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12933 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12935 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12937 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12939 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12940 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12941 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12942 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12944 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12945 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12947 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12949 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12950 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12963 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12964 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12965 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12966 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12967 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12968 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12969 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12970 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12971 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12972 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12973 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12974 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12975 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12976 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12977 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12978 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12979 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12980 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12981 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12983 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13007 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13008 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13009 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13010 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13011 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13012 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13013 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13014 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13079 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
13084 static const struct builtin_description bdesc_1arg[] =
13086 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13087 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13089 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13090 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13091 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13093 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13094 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13095 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13096 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13097 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13098 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13120 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13121 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13127 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13131 ix86_init_builtins ()
13134 ix86_init_mmx_sse_builtins ();
13137 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13138 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13141 ix86_init_mmx_sse_builtins ()
13143 const struct builtin_description * d;
13146 tree pchar_type_node = build_pointer_type (char_type_node);
13147 tree pcchar_type_node = build_pointer_type (
13148 build_type_variant (char_type_node, 1, 0));
13149 tree pfloat_type_node = build_pointer_type (float_type_node);
13150 tree pcfloat_type_node = build_pointer_type (
13151 build_type_variant (float_type_node, 1, 0));
13152 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13153 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13154 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13157 tree int_ftype_v4sf_v4sf
13158 = build_function_type_list (integer_type_node,
13159 V4SF_type_node, V4SF_type_node, NULL_TREE);
13160 tree v4si_ftype_v4sf_v4sf
13161 = build_function_type_list (V4SI_type_node,
13162 V4SF_type_node, V4SF_type_node, NULL_TREE);
13163 /* MMX/SSE/integer conversions. */
13164 tree int_ftype_v4sf
13165 = build_function_type_list (integer_type_node,
13166 V4SF_type_node, NULL_TREE);
13167 tree int64_ftype_v4sf
13168 = build_function_type_list (long_long_integer_type_node,
13169 V4SF_type_node, NULL_TREE);
13170 tree int_ftype_v8qi
13171 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13172 tree v4sf_ftype_v4sf_int
13173 = build_function_type_list (V4SF_type_node,
13174 V4SF_type_node, integer_type_node, NULL_TREE);
13175 tree v4sf_ftype_v4sf_int64
13176 = build_function_type_list (V4SF_type_node,
13177 V4SF_type_node, long_long_integer_type_node,
13179 tree v4sf_ftype_v4sf_v2si
13180 = build_function_type_list (V4SF_type_node,
13181 V4SF_type_node, V2SI_type_node, NULL_TREE);
13182 tree int_ftype_v4hi_int
13183 = build_function_type_list (integer_type_node,
13184 V4HI_type_node, integer_type_node, NULL_TREE);
13185 tree v4hi_ftype_v4hi_int_int
13186 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13187 integer_type_node, integer_type_node,
13189 /* Miscellaneous. */
13190 tree v8qi_ftype_v4hi_v4hi
13191 = build_function_type_list (V8QI_type_node,
13192 V4HI_type_node, V4HI_type_node, NULL_TREE);
13193 tree v4hi_ftype_v2si_v2si
13194 = build_function_type_list (V4HI_type_node,
13195 V2SI_type_node, V2SI_type_node, NULL_TREE);
13196 tree v4sf_ftype_v4sf_v4sf_int
13197 = build_function_type_list (V4SF_type_node,
13198 V4SF_type_node, V4SF_type_node,
13199 integer_type_node, NULL_TREE);
13200 tree v2si_ftype_v4hi_v4hi
13201 = build_function_type_list (V2SI_type_node,
13202 V4HI_type_node, V4HI_type_node, NULL_TREE);
13203 tree v4hi_ftype_v4hi_int
13204 = build_function_type_list (V4HI_type_node,
13205 V4HI_type_node, integer_type_node, NULL_TREE);
13206 tree v4hi_ftype_v4hi_di
13207 = build_function_type_list (V4HI_type_node,
13208 V4HI_type_node, long_long_unsigned_type_node,
13210 tree v2si_ftype_v2si_di
13211 = build_function_type_list (V2SI_type_node,
13212 V2SI_type_node, long_long_unsigned_type_node,
13214 tree void_ftype_void
13215 = build_function_type (void_type_node, void_list_node);
13216 tree void_ftype_unsigned
13217 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13218 tree unsigned_ftype_void
13219 = build_function_type (unsigned_type_node, void_list_node);
13221 = build_function_type (long_long_unsigned_type_node, void_list_node);
13222 tree v4sf_ftype_void
13223 = build_function_type (V4SF_type_node, void_list_node);
13224 tree v2si_ftype_v4sf
13225 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13226 /* Loads/stores. */
13227 tree void_ftype_v8qi_v8qi_pchar
13228 = build_function_type_list (void_type_node,
13229 V8QI_type_node, V8QI_type_node,
13230 pchar_type_node, NULL_TREE);
13231 tree v4sf_ftype_pcfloat
13232 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13233 /* @@@ the type is bogus */
13234 tree v4sf_ftype_v4sf_pv2si
13235 = build_function_type_list (V4SF_type_node,
13236 V4SF_type_node, pv2si_type_node, NULL_TREE);
13237 tree void_ftype_pv2si_v4sf
13238 = build_function_type_list (void_type_node,
13239 pv2si_type_node, V4SF_type_node, NULL_TREE);
13240 tree void_ftype_pfloat_v4sf
13241 = build_function_type_list (void_type_node,
13242 pfloat_type_node, V4SF_type_node, NULL_TREE);
13243 tree void_ftype_pdi_di
13244 = build_function_type_list (void_type_node,
13245 pdi_type_node, long_long_unsigned_type_node,
13247 tree void_ftype_pv2di_v2di
13248 = build_function_type_list (void_type_node,
13249 pv2di_type_node, V2DI_type_node, NULL_TREE);
13250 /* Normal vector unops. */
13251 tree v4sf_ftype_v4sf
13252 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13254 /* Normal vector binops. */
13255 tree v4sf_ftype_v4sf_v4sf
13256 = build_function_type_list (V4SF_type_node,
13257 V4SF_type_node, V4SF_type_node, NULL_TREE);
13258 tree v8qi_ftype_v8qi_v8qi
13259 = build_function_type_list (V8QI_type_node,
13260 V8QI_type_node, V8QI_type_node, NULL_TREE);
13261 tree v4hi_ftype_v4hi_v4hi
13262 = build_function_type_list (V4HI_type_node,
13263 V4HI_type_node, V4HI_type_node, NULL_TREE);
13264 tree v2si_ftype_v2si_v2si
13265 = build_function_type_list (V2SI_type_node,
13266 V2SI_type_node, V2SI_type_node, NULL_TREE);
13267 tree di_ftype_di_di
13268 = build_function_type_list (long_long_unsigned_type_node,
13269 long_long_unsigned_type_node,
13270 long_long_unsigned_type_node, NULL_TREE);
13272 tree v2si_ftype_v2sf
13273 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13274 tree v2sf_ftype_v2si
13275 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13276 tree v2si_ftype_v2si
13277 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13278 tree v2sf_ftype_v2sf
13279 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13280 tree v2sf_ftype_v2sf_v2sf
13281 = build_function_type_list (V2SF_type_node,
13282 V2SF_type_node, V2SF_type_node, NULL_TREE);
13283 tree v2si_ftype_v2sf_v2sf
13284 = build_function_type_list (V2SI_type_node,
13285 V2SF_type_node, V2SF_type_node, NULL_TREE);
13286 tree pint_type_node = build_pointer_type (integer_type_node);
13287 tree pcint_type_node = build_pointer_type (
13288 build_type_variant (integer_type_node, 1, 0));
13289 tree pdouble_type_node = build_pointer_type (double_type_node);
13290 tree pcdouble_type_node = build_pointer_type (
13291 build_type_variant (double_type_node, 1, 0));
13292 tree int_ftype_v2df_v2df
13293 = build_function_type_list (integer_type_node,
13294 V2DF_type_node, V2DF_type_node, NULL_TREE);
13297 = build_function_type (intTI_type_node, void_list_node);
13298 tree v2di_ftype_void
13299 = build_function_type (V2DI_type_node, void_list_node);
13300 tree ti_ftype_ti_ti
13301 = build_function_type_list (intTI_type_node,
13302 intTI_type_node, intTI_type_node, NULL_TREE);
13303 tree void_ftype_pcvoid
13304 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13306 = build_function_type_list (V2DI_type_node,
13307 long_long_unsigned_type_node, NULL_TREE);
13309 = build_function_type_list (long_long_unsigned_type_node,
13310 V2DI_type_node, NULL_TREE);
13311 tree v4sf_ftype_v4si
13312 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13313 tree v4si_ftype_v4sf
13314 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13315 tree v2df_ftype_v4si
13316 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13317 tree v4si_ftype_v2df
13318 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13319 tree v2si_ftype_v2df
13320 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13321 tree v4sf_ftype_v2df
13322 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13323 tree v2df_ftype_v2si
13324 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13325 tree v2df_ftype_v4sf
13326 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13327 tree int_ftype_v2df
13328 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13329 tree int64_ftype_v2df
13330 = build_function_type_list (long_long_integer_type_node,
13331 V2DF_type_node, NULL_TREE);
13332 tree v2df_ftype_v2df_int
13333 = build_function_type_list (V2DF_type_node,
13334 V2DF_type_node, integer_type_node, NULL_TREE);
13335 tree v2df_ftype_v2df_int64
13336 = build_function_type_list (V2DF_type_node,
13337 V2DF_type_node, long_long_integer_type_node,
13339 tree v4sf_ftype_v4sf_v2df
13340 = build_function_type_list (V4SF_type_node,
13341 V4SF_type_node, V2DF_type_node, NULL_TREE);
13342 tree v2df_ftype_v2df_v4sf
13343 = build_function_type_list (V2DF_type_node,
13344 V2DF_type_node, V4SF_type_node, NULL_TREE);
13345 tree v2df_ftype_v2df_v2df_int
13346 = build_function_type_list (V2DF_type_node,
13347 V2DF_type_node, V2DF_type_node,
13350 tree v2df_ftype_v2df_pv2si
13351 = build_function_type_list (V2DF_type_node,
13352 V2DF_type_node, pv2si_type_node, NULL_TREE);
13353 tree void_ftype_pv2si_v2df
13354 = build_function_type_list (void_type_node,
13355 pv2si_type_node, V2DF_type_node, NULL_TREE);
13356 tree void_ftype_pdouble_v2df
13357 = build_function_type_list (void_type_node,
13358 pdouble_type_node, V2DF_type_node, NULL_TREE);
13359 tree void_ftype_pint_int
13360 = build_function_type_list (void_type_node,
13361 pint_type_node, integer_type_node, NULL_TREE);
13362 tree void_ftype_v16qi_v16qi_pchar
13363 = build_function_type_list (void_type_node,
13364 V16QI_type_node, V16QI_type_node,
13365 pchar_type_node, NULL_TREE);
13366 tree v2df_ftype_pcdouble
13367 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13368 tree v2df_ftype_v2df_v2df
13369 = build_function_type_list (V2DF_type_node,
13370 V2DF_type_node, V2DF_type_node, NULL_TREE);
13371 tree v16qi_ftype_v16qi_v16qi
13372 = build_function_type_list (V16QI_type_node,
13373 V16QI_type_node, V16QI_type_node, NULL_TREE);
13374 tree v8hi_ftype_v8hi_v8hi
13375 = build_function_type_list (V8HI_type_node,
13376 V8HI_type_node, V8HI_type_node, NULL_TREE);
13377 tree v4si_ftype_v4si_v4si
13378 = build_function_type_list (V4SI_type_node,
13379 V4SI_type_node, V4SI_type_node, NULL_TREE);
13380 tree v2di_ftype_v2di_v2di
13381 = build_function_type_list (V2DI_type_node,
13382 V2DI_type_node, V2DI_type_node, NULL_TREE);
13383 tree v2di_ftype_v2df_v2df
13384 = build_function_type_list (V2DI_type_node,
13385 V2DF_type_node, V2DF_type_node, NULL_TREE);
13386 tree v2df_ftype_v2df
13387 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13388 tree v2df_ftype_double
13389 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13390 tree v2df_ftype_double_double
13391 = build_function_type_list (V2DF_type_node,
13392 double_type_node, double_type_node, NULL_TREE);
13393 tree int_ftype_v8hi_int
13394 = build_function_type_list (integer_type_node,
13395 V8HI_type_node, integer_type_node, NULL_TREE);
13396 tree v8hi_ftype_v8hi_int_int
13397 = build_function_type_list (V8HI_type_node,
13398 V8HI_type_node, integer_type_node,
13399 integer_type_node, NULL_TREE);
13400 tree v2di_ftype_v2di_int
13401 = build_function_type_list (V2DI_type_node,
13402 V2DI_type_node, integer_type_node, NULL_TREE);
13403 tree v4si_ftype_v4si_int
13404 = build_function_type_list (V4SI_type_node,
13405 V4SI_type_node, integer_type_node, NULL_TREE);
13406 tree v8hi_ftype_v8hi_int
13407 = build_function_type_list (V8HI_type_node,
13408 V8HI_type_node, integer_type_node, NULL_TREE);
13409 tree v8hi_ftype_v8hi_v2di
13410 = build_function_type_list (V8HI_type_node,
13411 V8HI_type_node, V2DI_type_node, NULL_TREE);
13412 tree v4si_ftype_v4si_v2di
13413 = build_function_type_list (V4SI_type_node,
13414 V4SI_type_node, V2DI_type_node, NULL_TREE);
13415 tree v4si_ftype_v8hi_v8hi
13416 = build_function_type_list (V4SI_type_node,
13417 V8HI_type_node, V8HI_type_node, NULL_TREE);
13418 tree di_ftype_v8qi_v8qi
13419 = build_function_type_list (long_long_unsigned_type_node,
13420 V8QI_type_node, V8QI_type_node, NULL_TREE);
13421 tree v2di_ftype_v16qi_v16qi
13422 = build_function_type_list (V2DI_type_node,
13423 V16QI_type_node, V16QI_type_node, NULL_TREE);
13424 tree int_ftype_v16qi
13425 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13426 tree v16qi_ftype_pcchar
13427 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13428 tree void_ftype_pchar_v16qi
13429 = build_function_type_list (void_type_node,
13430 pchar_type_node, V16QI_type_node, NULL_TREE);
13431 tree v4si_ftype_pcint
13432 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13433 tree void_ftype_pcint_v4si
13434 = build_function_type_list (void_type_node,
13435 pcint_type_node, V4SI_type_node, NULL_TREE);
13436 tree v2di_ftype_v2di
13437 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13439 /* Add all builtins that are more or less simple operations on two
13441 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13443 /* Use one of the operands; the target can have a different mode for
13444 mask-generating compares. */
13445 enum machine_mode mode;
13450 mode = insn_data[d->icode].operand[1].mode;
13455 type = v16qi_ftype_v16qi_v16qi;
13458 type = v8hi_ftype_v8hi_v8hi;
13461 type = v4si_ftype_v4si_v4si;
13464 type = v2di_ftype_v2di_v2di;
13467 type = v2df_ftype_v2df_v2df;
13470 type = ti_ftype_ti_ti;
13473 type = v4sf_ftype_v4sf_v4sf;
13476 type = v8qi_ftype_v8qi_v8qi;
13479 type = v4hi_ftype_v4hi_v4hi;
13482 type = v2si_ftype_v2si_v2si;
13485 type = di_ftype_di_di;
13492 /* Override for comparisons. */
13493 if (d->icode == CODE_FOR_maskcmpv4sf3
13494 || d->icode == CODE_FOR_maskncmpv4sf3
13495 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13496 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13497 type = v4si_ftype_v4sf_v4sf;
13499 if (d->icode == CODE_FOR_maskcmpv2df3
13500 || d->icode == CODE_FOR_maskncmpv2df3
13501 || d->icode == CODE_FOR_vmmaskcmpv2df3
13502 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13503 type = v2di_ftype_v2df_v2df;
13505 def_builtin (d->mask, d->name, type, d->code);
13508 /* Add the remaining MMX insns with somewhat more complicated types. */
13509 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13510 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13511 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13512 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13513 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13515 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13516 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13517 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13519 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13520 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13522 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13523 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13525 /* comi/ucomi insns. */
13526 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13527 if (d->mask == MASK_SSE2)
13528 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13530 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13532 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13533 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13534 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13536 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13537 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13538 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13539 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13540 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13541 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13542 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13543 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13544 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13545 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13546 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13548 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13549 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13551 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13553 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13554 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13555 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13556 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13557 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13558 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13560 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13561 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13562 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13563 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13565 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13566 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13567 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13568 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13570 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13572 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13574 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13575 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13576 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13577 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13578 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13579 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13581 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13583 /* Original 3DNow! */
13584 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13585 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13586 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13587 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13588 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13589 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13590 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13591 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13592 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13593 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13594 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13595 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13596 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13597 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13598 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13599 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13600 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13601 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13602 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13603 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13605 /* 3DNow! extension as used in the Athlon CPU. */
13606 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13607 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13608 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13609 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13610 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13611 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13613 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13664 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13665 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13672 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13696 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13725 /* Errors in the source file can cause expand_expr to return const0_rtx
13726 where we expect a vector. To avoid crashing, use one of the vector
13727 clear instructions. */
13729 safe_vector_operand (x, mode)
13731 enum machine_mode mode;
13733 if (x != const0_rtx)
13735 x = gen_reg_rtx (mode);
13737 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13738 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13739 : gen_rtx_SUBREG (DImode, x, 0)));
13741 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13742 : gen_rtx_SUBREG (V4SFmode, x, 0),
13743 CONST0_RTX (V4SFmode)));
13747 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13750 ix86_expand_binop_builtin (icode, arglist, target)
13751 enum insn_code icode;
13756 tree arg0 = TREE_VALUE (arglist);
13757 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13758 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13759 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13760 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13761 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13762 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13764 if (VECTOR_MODE_P (mode0))
13765 op0 = safe_vector_operand (op0, mode0);
13766 if (VECTOR_MODE_P (mode1))
13767 op1 = safe_vector_operand (op1, mode1);
13770 || GET_MODE (target) != tmode
13771 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13772 target = gen_reg_rtx (tmode);
13774 if (GET_MODE (op1) == SImode && mode1 == TImode)
13776 rtx x = gen_reg_rtx (V4SImode);
13777 emit_insn (gen_sse2_loadd (x, op1));
13778 op1 = gen_lowpart (TImode, x);
13781 /* In case the insn wants input operands in modes different from
13782 the result, abort. */
13783 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13787 op0 = copy_to_mode_reg (mode0, op0);
13788 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13789 op1 = copy_to_mode_reg (mode1, op1);
13791 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13792 yet one of the two must not be a memory. This is normally enforced
13793 by expanders, but we didn't bother to create one here. */
13794 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13795 op0 = copy_to_mode_reg (mode0, op0);
13797 pat = GEN_FCN (icode) (target, op0, op1);
13804 /* Subroutine of ix86_expand_builtin to take care of stores. */
13807 ix86_expand_store_builtin (icode, arglist)
13808 enum insn_code icode;
13812 tree arg0 = TREE_VALUE (arglist);
13813 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13814 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13815 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13816 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13817 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13819 if (VECTOR_MODE_P (mode1))
13820 op1 = safe_vector_operand (op1, mode1);
13822 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13823 op1 = copy_to_mode_reg (mode1, op1);
13825 pat = GEN_FCN (icode) (op0, op1);
13831 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13834 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13835 enum insn_code icode;
13841 tree arg0 = TREE_VALUE (arglist);
13842 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13843 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13844 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13847 || GET_MODE (target) != tmode
13848 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13849 target = gen_reg_rtx (tmode);
13851 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13854 if (VECTOR_MODE_P (mode0))
13855 op0 = safe_vector_operand (op0, mode0);
13857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13858 op0 = copy_to_mode_reg (mode0, op0);
13861 pat = GEN_FCN (icode) (target, op0);
13868 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13869 sqrtss, rsqrtss, rcpss. */
13872 ix86_expand_unop1_builtin (icode, arglist, target)
13873 enum insn_code icode;
13878 tree arg0 = TREE_VALUE (arglist);
13879 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13884 || GET_MODE (target) != tmode
13885 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13886 target = gen_reg_rtx (tmode);
13888 if (VECTOR_MODE_P (mode0))
13889 op0 = safe_vector_operand (op0, mode0);
13891 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13892 op0 = copy_to_mode_reg (mode0, op0);
13895 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13896 op1 = copy_to_mode_reg (mode0, op1);
13898 pat = GEN_FCN (icode) (target, op0, op1);
13905 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13908 ix86_expand_sse_compare (d, arglist, target)
13909 const struct builtin_description *d;
13914 tree arg0 = TREE_VALUE (arglist);
13915 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13916 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13917 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13919 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13920 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13921 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13922 enum rtx_code comparison = d->comparison;
13924 if (VECTOR_MODE_P (mode0))
13925 op0 = safe_vector_operand (op0, mode0);
13926 if (VECTOR_MODE_P (mode1))
13927 op1 = safe_vector_operand (op1, mode1);
13929 /* Swap operands if we have a comparison that isn't available in
13933 rtx tmp = gen_reg_rtx (mode1);
13934 emit_move_insn (tmp, op1);
13940 || GET_MODE (target) != tmode
13941 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13942 target = gen_reg_rtx (tmode);
13944 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13945 op0 = copy_to_mode_reg (mode0, op0);
13946 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13947 op1 = copy_to_mode_reg (mode1, op1);
13949 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13950 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13957 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13960 ix86_expand_sse_comi (d, arglist, target)
13961 const struct builtin_description *d;
13966 tree arg0 = TREE_VALUE (arglist);
13967 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13968 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13969 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13971 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13972 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13973 enum rtx_code comparison = d->comparison;
13975 if (VECTOR_MODE_P (mode0))
13976 op0 = safe_vector_operand (op0, mode0);
13977 if (VECTOR_MODE_P (mode1))
13978 op1 = safe_vector_operand (op1, mode1);
13980 /* Swap operands if we have a comparison that isn't available in
13989 target = gen_reg_rtx (SImode);
13990 emit_move_insn (target, const0_rtx);
13991 target = gen_rtx_SUBREG (QImode, target, 0);
13993 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13994 op0 = copy_to_mode_reg (mode0, op0);
13995 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13996 op1 = copy_to_mode_reg (mode1, op1);
13998 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13999 pat = GEN_FCN (d->icode) (op0, op1);
14003 emit_insn (gen_rtx_SET (VOIDmode,
14004 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14005 gen_rtx_fmt_ee (comparison, QImode,
14009 return SUBREG_REG (target);
14012 /* Expand an expression EXP that calls a built-in function,
14013 with result going to TARGET if that's convenient
14014 (and in mode MODE if that's convenient).
14015 SUBTARGET may be used as the target for computing one of EXP's operands.
14016 IGNORE is nonzero if the value is to be ignored. */
14019 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
14022 rtx subtarget ATTRIBUTE_UNUSED;
14023 enum machine_mode mode ATTRIBUTE_UNUSED;
14024 int ignore ATTRIBUTE_UNUSED;
14026 const struct builtin_description *d;
14028 enum insn_code icode;
14029 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14030 tree arglist = TREE_OPERAND (exp, 1);
14031 tree arg0, arg1, arg2;
14032 rtx op0, op1, op2, pat;
14033 enum machine_mode tmode, mode0, mode1, mode2;
14034 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14038 case IX86_BUILTIN_EMMS:
14039 emit_insn (gen_emms ());
14042 case IX86_BUILTIN_SFENCE:
14043 emit_insn (gen_sfence ());
14046 case IX86_BUILTIN_PEXTRW:
14047 case IX86_BUILTIN_PEXTRW128:
14048 icode = (fcode == IX86_BUILTIN_PEXTRW
14049 ? CODE_FOR_mmx_pextrw
14050 : CODE_FOR_sse2_pextrw);
14051 arg0 = TREE_VALUE (arglist);
14052 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14053 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14054 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14055 tmode = insn_data[icode].operand[0].mode;
14056 mode0 = insn_data[icode].operand[1].mode;
14057 mode1 = insn_data[icode].operand[2].mode;
14059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14060 op0 = copy_to_mode_reg (mode0, op0);
14061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14063 /* @@@ better error message */
14064 error ("selector must be an immediate");
14065 return gen_reg_rtx (tmode);
14068 || GET_MODE (target) != tmode
14069 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14070 target = gen_reg_rtx (tmode);
14071 pat = GEN_FCN (icode) (target, op0, op1);
14077 case IX86_BUILTIN_PINSRW:
14078 case IX86_BUILTIN_PINSRW128:
14079 icode = (fcode == IX86_BUILTIN_PINSRW
14080 ? CODE_FOR_mmx_pinsrw
14081 : CODE_FOR_sse2_pinsrw);
14082 arg0 = TREE_VALUE (arglist);
14083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14084 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14085 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14086 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14087 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14088 tmode = insn_data[icode].operand[0].mode;
14089 mode0 = insn_data[icode].operand[1].mode;
14090 mode1 = insn_data[icode].operand[2].mode;
14091 mode2 = insn_data[icode].operand[3].mode;
14093 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14094 op0 = copy_to_mode_reg (mode0, op0);
14095 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14096 op1 = copy_to_mode_reg (mode1, op1);
14097 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14099 /* @@@ better error message */
14100 error ("selector must be an immediate");
14104 || GET_MODE (target) != tmode
14105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14106 target = gen_reg_rtx (tmode);
14107 pat = GEN_FCN (icode) (target, op0, op1, op2);
14113 case IX86_BUILTIN_MASKMOVQ:
14114 case IX86_BUILTIN_MASKMOVDQU:
14115 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14116 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14117 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14118 : CODE_FOR_sse2_maskmovdqu));
14119 /* Note the arg order is different from the operand order. */
14120 arg1 = TREE_VALUE (arglist);
14121 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14122 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14125 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14126 mode0 = insn_data[icode].operand[0].mode;
14127 mode1 = insn_data[icode].operand[1].mode;
14128 mode2 = insn_data[icode].operand[2].mode;
14130 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14131 op0 = copy_to_mode_reg (mode0, op0);
14132 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14133 op1 = copy_to_mode_reg (mode1, op1);
14134 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14135 op2 = copy_to_mode_reg (mode2, op2);
14136 pat = GEN_FCN (icode) (op0, op1, op2);
14142 case IX86_BUILTIN_SQRTSS:
14143 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14144 case IX86_BUILTIN_RSQRTSS:
14145 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14146 case IX86_BUILTIN_RCPSS:
14147 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14149 case IX86_BUILTIN_LOADAPS:
14150 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14152 case IX86_BUILTIN_LOADUPS:
14153 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14155 case IX86_BUILTIN_STOREAPS:
14156 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14158 case IX86_BUILTIN_STOREUPS:
14159 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14161 case IX86_BUILTIN_LOADSS:
14162 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14164 case IX86_BUILTIN_STORESS:
14165 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14167 case IX86_BUILTIN_LOADHPS:
14168 case IX86_BUILTIN_LOADLPS:
14169 case IX86_BUILTIN_LOADHPD:
14170 case IX86_BUILTIN_LOADLPD:
14171 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14172 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14173 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14174 : CODE_FOR_sse2_movlpd);
14175 arg0 = TREE_VALUE (arglist);
14176 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14177 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14178 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14179 tmode = insn_data[icode].operand[0].mode;
14180 mode0 = insn_data[icode].operand[1].mode;
14181 mode1 = insn_data[icode].operand[2].mode;
14183 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14184 op0 = copy_to_mode_reg (mode0, op0);
14185 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14187 || GET_MODE (target) != tmode
14188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14189 target = gen_reg_rtx (tmode);
14190 pat = GEN_FCN (icode) (target, op0, op1);
14196 case IX86_BUILTIN_STOREHPS:
14197 case IX86_BUILTIN_STORELPS:
14198 case IX86_BUILTIN_STOREHPD:
14199 case IX86_BUILTIN_STORELPD:
14200 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14201 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14202 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14203 : CODE_FOR_sse2_movlpd);
14204 arg0 = TREE_VALUE (arglist);
14205 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14206 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14207 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14208 mode0 = insn_data[icode].operand[1].mode;
14209 mode1 = insn_data[icode].operand[2].mode;
14211 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14212 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14213 op1 = copy_to_mode_reg (mode1, op1);
14215 pat = GEN_FCN (icode) (op0, op0, op1);
14221 case IX86_BUILTIN_MOVNTPS:
14222 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14223 case IX86_BUILTIN_MOVNTQ:
14224 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14226 case IX86_BUILTIN_LDMXCSR:
14227 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14228 target = assign_386_stack_local (SImode, 0);
14229 emit_move_insn (target, op0);
14230 emit_insn (gen_ldmxcsr (target));
14233 case IX86_BUILTIN_STMXCSR:
14234 target = assign_386_stack_local (SImode, 0);
14235 emit_insn (gen_stmxcsr (target));
14236 return copy_to_mode_reg (SImode, target);
14238 case IX86_BUILTIN_SHUFPS:
14239 case IX86_BUILTIN_SHUFPD:
14240 icode = (fcode == IX86_BUILTIN_SHUFPS
14241 ? CODE_FOR_sse_shufps
14242 : CODE_FOR_sse2_shufpd);
14243 arg0 = TREE_VALUE (arglist);
14244 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14245 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14246 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14247 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14248 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14249 tmode = insn_data[icode].operand[0].mode;
14250 mode0 = insn_data[icode].operand[1].mode;
14251 mode1 = insn_data[icode].operand[2].mode;
14252 mode2 = insn_data[icode].operand[3].mode;
14254 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14255 op0 = copy_to_mode_reg (mode0, op0);
14256 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14257 op1 = copy_to_mode_reg (mode1, op1);
14258 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14260 /* @@@ better error message */
14261 error ("mask must be an immediate");
14262 return gen_reg_rtx (tmode);
14265 || GET_MODE (target) != tmode
14266 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14267 target = gen_reg_rtx (tmode);
14268 pat = GEN_FCN (icode) (target, op0, op1, op2);
14274 case IX86_BUILTIN_PSHUFW:
14275 case IX86_BUILTIN_PSHUFD:
14276 case IX86_BUILTIN_PSHUFHW:
14277 case IX86_BUILTIN_PSHUFLW:
14278 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14279 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14280 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14281 : CODE_FOR_mmx_pshufw);
14282 arg0 = TREE_VALUE (arglist);
14283 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14284 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14285 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14286 tmode = insn_data[icode].operand[0].mode;
14287 mode1 = insn_data[icode].operand[1].mode;
14288 mode2 = insn_data[icode].operand[2].mode;
14290 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14291 op0 = copy_to_mode_reg (mode1, op0);
14292 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14294 /* @@@ better error message */
14295 error ("mask must be an immediate");
14299 || GET_MODE (target) != tmode
14300 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14301 target = gen_reg_rtx (tmode);
14302 pat = GEN_FCN (icode) (target, op0, op1);
14308 case IX86_BUILTIN_PSLLDQI128:
14309 case IX86_BUILTIN_PSRLDQI128:
14310 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14311 : CODE_FOR_sse2_lshrti3);
14312 arg0 = TREE_VALUE (arglist);
14313 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14316 tmode = insn_data[icode].operand[0].mode;
14317 mode1 = insn_data[icode].operand[1].mode;
14318 mode2 = insn_data[icode].operand[2].mode;
14320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14322 op0 = copy_to_reg (op0);
14323 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14325 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14327 error ("shift must be an immediate");
14330 target = gen_reg_rtx (V2DImode);
14331 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14337 case IX86_BUILTIN_FEMMS:
14338 emit_insn (gen_femms ());
14341 case IX86_BUILTIN_PAVGUSB:
14342 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14344 case IX86_BUILTIN_PF2ID:
14345 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14347 case IX86_BUILTIN_PFACC:
14348 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14350 case IX86_BUILTIN_PFADD:
14351 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14353 case IX86_BUILTIN_PFCMPEQ:
14354 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14356 case IX86_BUILTIN_PFCMPGE:
14357 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14359 case IX86_BUILTIN_PFCMPGT:
14360 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14362 case IX86_BUILTIN_PFMAX:
14363 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14365 case IX86_BUILTIN_PFMIN:
14366 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14368 case IX86_BUILTIN_PFMUL:
14369 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14371 case IX86_BUILTIN_PFRCP:
14372 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14374 case IX86_BUILTIN_PFRCPIT1:
14375 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14377 case IX86_BUILTIN_PFRCPIT2:
14378 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14380 case IX86_BUILTIN_PFRSQIT1:
14381 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14383 case IX86_BUILTIN_PFRSQRT:
14384 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14386 case IX86_BUILTIN_PFSUB:
14387 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14389 case IX86_BUILTIN_PFSUBR:
14390 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14392 case IX86_BUILTIN_PI2FD:
14393 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14395 case IX86_BUILTIN_PMULHRW:
14396 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14398 case IX86_BUILTIN_PF2IW:
14399 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14401 case IX86_BUILTIN_PFNACC:
14402 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14404 case IX86_BUILTIN_PFPNACC:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14407 case IX86_BUILTIN_PI2FW:
14408 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14410 case IX86_BUILTIN_PSWAPDSI:
14411 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14413 case IX86_BUILTIN_PSWAPDSF:
14414 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14416 case IX86_BUILTIN_SSE_ZERO:
14417 target = gen_reg_rtx (V4SFmode);
14418 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14421 case IX86_BUILTIN_MMX_ZERO:
14422 target = gen_reg_rtx (DImode);
14423 emit_insn (gen_mmx_clrdi (target));
14426 case IX86_BUILTIN_CLRTI:
14427 target = gen_reg_rtx (V2DImode);
14428 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14432 case IX86_BUILTIN_SQRTSD:
14433 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14434 case IX86_BUILTIN_LOADAPD:
14435 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14436 case IX86_BUILTIN_LOADUPD:
14437 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14439 case IX86_BUILTIN_STOREAPD:
14440 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14441 case IX86_BUILTIN_STOREUPD:
14442 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14444 case IX86_BUILTIN_LOADSD:
14445 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14447 case IX86_BUILTIN_STORESD:
14448 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14450 case IX86_BUILTIN_SETPD1:
14451 target = assign_386_stack_local (DFmode, 0);
14452 arg0 = TREE_VALUE (arglist);
14453 emit_move_insn (adjust_address (target, DFmode, 0),
14454 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14455 op0 = gen_reg_rtx (V2DFmode);
14456 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14457 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14460 case IX86_BUILTIN_SETPD:
14461 target = assign_386_stack_local (V2DFmode, 0);
14462 arg0 = TREE_VALUE (arglist);
14463 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14464 emit_move_insn (adjust_address (target, DFmode, 0),
14465 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14466 emit_move_insn (adjust_address (target, DFmode, 8),
14467 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14468 op0 = gen_reg_rtx (V2DFmode);
14469 emit_insn (gen_sse2_movapd (op0, target));
14472 case IX86_BUILTIN_LOADRPD:
14473 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14474 gen_reg_rtx (V2DFmode), 1);
14475 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14478 case IX86_BUILTIN_LOADPD1:
14479 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14480 gen_reg_rtx (V2DFmode), 1);
14481 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14484 case IX86_BUILTIN_STOREPD1:
14485 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14486 case IX86_BUILTIN_STORERPD:
14487 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14489 case IX86_BUILTIN_CLRPD:
14490 target = gen_reg_rtx (V2DFmode);
14491 emit_insn (gen_sse_clrv2df (target));
14494 case IX86_BUILTIN_MFENCE:
14495 emit_insn (gen_sse2_mfence ());
14497 case IX86_BUILTIN_LFENCE:
14498 emit_insn (gen_sse2_lfence ());
14501 case IX86_BUILTIN_CLFLUSH:
14502 arg0 = TREE_VALUE (arglist);
14503 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14504 icode = CODE_FOR_sse2_clflush;
14505 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14506 op0 = copy_to_mode_reg (Pmode, op0);
14508 emit_insn (gen_sse2_clflush (op0));
14511 case IX86_BUILTIN_MOVNTPD:
14512 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14513 case IX86_BUILTIN_MOVNTDQ:
14514 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14515 case IX86_BUILTIN_MOVNTI:
14516 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14518 case IX86_BUILTIN_LOADDQA:
14519 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14520 case IX86_BUILTIN_LOADDQU:
14521 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14522 case IX86_BUILTIN_LOADD:
14523 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14525 case IX86_BUILTIN_STOREDQA:
14526 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14527 case IX86_BUILTIN_STOREDQU:
14528 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14529 case IX86_BUILTIN_STORED:
14530 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14536 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14537 if (d->code == fcode)
14539 /* Compares are treated specially. */
14540 if (d->icode == CODE_FOR_maskcmpv4sf3
14541 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14542 || d->icode == CODE_FOR_maskncmpv4sf3
14543 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14544 || d->icode == CODE_FOR_maskcmpv2df3
14545 || d->icode == CODE_FOR_vmmaskcmpv2df3
14546 || d->icode == CODE_FOR_maskncmpv2df3
14547 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14548 return ix86_expand_sse_compare (d, arglist, target);
14550 return ix86_expand_binop_builtin (d->icode, arglist, target);
14553 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14554 if (d->code == fcode)
14555 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14557 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14558 if (d->code == fcode)
14559 return ix86_expand_sse_comi (d, arglist, target);
14561 /* @@@ Should really do something sensible here. */
14565 /* Store OPERAND to the memory after reload is completed. This means
14566 that we can't easily use assign_stack_local. */
14568 ix86_force_to_memory (mode, operand)
14569 enum machine_mode mode;
14573 if (!reload_completed)
14575 if (TARGET_64BIT && TARGET_RED_ZONE)
14577 result = gen_rtx_MEM (mode,
14578 gen_rtx_PLUS (Pmode,
14580 GEN_INT (-RED_ZONE_SIZE)));
14581 emit_move_insn (result, operand);
14583 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14589 operand = gen_lowpart (DImode, operand);
14593 gen_rtx_SET (VOIDmode,
14594 gen_rtx_MEM (DImode,
14595 gen_rtx_PRE_DEC (DImode,
14596 stack_pointer_rtx)),
14602 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14611 split_di (&operand, 1, operands, operands + 1);
14613 gen_rtx_SET (VOIDmode,
14614 gen_rtx_MEM (SImode,
14615 gen_rtx_PRE_DEC (Pmode,
14616 stack_pointer_rtx)),
14619 gen_rtx_SET (VOIDmode,
14620 gen_rtx_MEM (SImode,
14621 gen_rtx_PRE_DEC (Pmode,
14622 stack_pointer_rtx)),
14627 /* It is better to store HImodes as SImodes. */
14628 if (!TARGET_PARTIAL_REG_STALL)
14629 operand = gen_lowpart (SImode, operand);
14633 gen_rtx_SET (VOIDmode,
14634 gen_rtx_MEM (GET_MODE (operand),
14635 gen_rtx_PRE_DEC (SImode,
14636 stack_pointer_rtx)),
14642 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14647 /* Free operand from the memory. */
14649 ix86_free_from_memory (mode)
14650 enum machine_mode mode;
14652 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14656 if (mode == DImode || TARGET_64BIT)
14658 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14662 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14663 to pop or add instruction if registers are available. */
14664 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14665 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14670 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14671 QImode must go into class Q_REGS.
14672 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14673 movdf to do mem-to-mem moves through integer regs. */
14675 ix86_preferred_reload_class (x, class)
14677 enum reg_class class;
14679 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14681 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14683 /* SSE can't load any constant directly yet. */
14684 if (SSE_CLASS_P (class))
14686 /* Floats can load 0 and 1. */
14687 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14689 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14690 if (MAYBE_SSE_CLASS_P (class))
14691 return (reg_class_subset_p (class, GENERAL_REGS)
14692 ? GENERAL_REGS : FLOAT_REGS);
14696 /* General regs can load everything. */
14697 if (reg_class_subset_p (class, GENERAL_REGS))
14698 return GENERAL_REGS;
14699 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14700 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14703 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14705 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14710 /* If we are copying between general and FP registers, we need a memory
14711 location. The same is true for SSE and MMX registers.
14713 The macro can't work reliably when one of the CLASSES is class containing
14714 registers from multiple units (SSE, MMX, integer). We avoid this by never
14715 combining those units in single alternative in the machine description.
14716 Ensure that this constraint holds to avoid unexpected surprises.
14718 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14719 enforce these sanity checks. */
14721 ix86_secondary_memory_needed (class1, class2, mode, strict)
14722 enum reg_class class1, class2;
14723 enum machine_mode mode;
14726 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14727 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14728 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14729 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14730 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14731 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14738 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14739 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14740 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14741 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14742 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14744 /* Return the cost of moving data from a register in class CLASS1 to
14745 one in class CLASS2.
14747 It is not required that the cost always equal 2 when FROM is the same as TO;
14748 on some machines it is expensive to move between registers if they are not
14749 general registers. */
14751 ix86_register_move_cost (mode, class1, class2)
14752 enum machine_mode mode;
14753 enum reg_class class1, class2;
14755 /* In case we require secondary memory, compute cost of the store followed
14756 by load. In order to avoid bad register allocation choices, we need
14757 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14759 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14763 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14764 MEMORY_MOVE_COST (mode, class1, 1));
14765 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14766 MEMORY_MOVE_COST (mode, class2, 1));
14768 /* In case of copying from general_purpose_register we may emit multiple
14769 stores followed by single load causing memory size mismatch stall.
14770 Count this as arbitrarily high cost of 20. */
14771 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14774 /* In the case of FP/MMX moves, the registers actually overlap, and we
14775 have to switch modes in order to treat them differently. */
14776 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14777 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14783 /* Moves between SSE/MMX and integer unit are expensive. */
14784 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14785 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14786 return ix86_cost->mmxsse_to_integer;
14787 if (MAYBE_FLOAT_CLASS_P (class1))
14788 return ix86_cost->fp_move;
14789 if (MAYBE_SSE_CLASS_P (class1))
14790 return ix86_cost->sse_move;
14791 if (MAYBE_MMX_CLASS_P (class1))
14792 return ix86_cost->mmx_move;
14796 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14798 ix86_hard_regno_mode_ok (regno, mode)
14800 enum machine_mode mode;
14802 /* Flags and only flags can only hold CCmode values. */
14803 if (CC_REGNO_P (regno))
14804 return GET_MODE_CLASS (mode) == MODE_CC;
14805 if (GET_MODE_CLASS (mode) == MODE_CC
14806 || GET_MODE_CLASS (mode) == MODE_RANDOM
14807 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14809 if (FP_REGNO_P (regno))
14810 return VALID_FP_MODE_P (mode);
14811 if (SSE_REGNO_P (regno))
14812 return VALID_SSE_REG_MODE (mode);
14813 if (MMX_REGNO_P (regno))
14814 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14815 /* We handle both integer and floats in the general purpose registers.
14816 In future we should be able to handle vector modes as well. */
14817 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14819 /* Take care for QImode values - they can be in non-QI regs, but then
14820 they do cause partial register stalls. */
14821 if (regno < 4 || mode != QImode || TARGET_64BIT)
14823 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14826 /* Return the cost of moving data of mode M between a
14827 register and memory. A value of 2 is the default; this cost is
14828 relative to those in `REGISTER_MOVE_COST'.
14830 If moving between registers and memory is more expensive than
14831 between two registers, you should define this macro to express the
14834 Model also increased moving costs of QImode registers in non
14838 ix86_memory_move_cost (mode, class, in)
14839 enum machine_mode mode;
14840 enum reg_class class;
14843 if (FLOAT_CLASS_P (class))
14861 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14863 if (SSE_CLASS_P (class))
14866 switch (GET_MODE_SIZE (mode))
14880 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14882 if (MMX_CLASS_P (class))
14885 switch (GET_MODE_SIZE (mode))
14896 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14898 switch (GET_MODE_SIZE (mode))
14902 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14903 : ix86_cost->movzbl_load);
14905 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14906 : ix86_cost->int_store[0] + 4);
14909 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14911 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14912 if (mode == TFmode)
14914 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14915 * ((int) GET_MODE_SIZE (mode)
14916 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14920 /* Compute a (partial) cost for rtx X. Return true if the complete
14921 cost has been computed, and false if subexpressions should be
14922 scanned. In either case, *TOTAL contains the cost result. */
14925 ix86_rtx_costs (x, code, outer_code, total)
14927 int code, outer_code;
14930 enum machine_mode mode = GET_MODE (x);
14938 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14940 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14942 else if (flag_pic && SYMBOLIC_CONST (x))
14949 if (mode == VOIDmode)
14952 switch (standard_80387_constant_p (x))
14957 default: /* Other constants */
14962 /* Start with (MEM (SYMBOL_REF)), since that's where
14963 it'll probably end up. Add a penalty for size. */
14964 *total = (COSTS_N_INSNS (1)
14966 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14972 /* The zero extensions is often completely free on x86_64, so make
14973 it as cheap as possible. */
14974 if (TARGET_64BIT && mode == DImode
14975 && GET_MODE (XEXP (x, 0)) == SImode)
14977 else if (TARGET_ZERO_EXTEND_WITH_AND)
14978 *total = COSTS_N_INSNS (ix86_cost->add);
14980 *total = COSTS_N_INSNS (ix86_cost->movzx);
14984 *total = COSTS_N_INSNS (ix86_cost->movsx);
14988 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14989 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14991 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14994 *total = COSTS_N_INSNS (ix86_cost->add);
14997 if ((value == 2 || value == 3)
14998 && !TARGET_DECOMPOSE_LEA
14999 && ix86_cost->lea <= ix86_cost->shift_const)
15001 *total = COSTS_N_INSNS (ix86_cost->lea);
15011 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15013 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15015 if (INTVAL (XEXP (x, 1)) > 32)
15016 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15018 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15022 if (GET_CODE (XEXP (x, 1)) == AND)
15023 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15025 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15030 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15031 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15033 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15038 if (FLOAT_MODE_P (mode))
15039 *total = COSTS_N_INSNS (ix86_cost->fmul);
15040 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15042 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15045 for (nbits = 0; value != 0; value >>= 1)
15048 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15049 + nbits * ix86_cost->mult_bit);
15053 /* This is arbitrary */
15054 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15055 + 7 * ix86_cost->mult_bit);
15063 if (FLOAT_MODE_P (mode))
15064 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15066 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15070 if (FLOAT_MODE_P (mode))
15071 *total = COSTS_N_INSNS (ix86_cost->fadd);
15072 else if (!TARGET_DECOMPOSE_LEA
15073 && GET_MODE_CLASS (mode) == MODE_INT
15074 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15076 if (GET_CODE (XEXP (x, 0)) == PLUS
15077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15078 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15079 && CONSTANT_P (XEXP (x, 1)))
15081 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15082 if (val == 2 || val == 4 || val == 8)
15084 *total = COSTS_N_INSNS (ix86_cost->lea);
15085 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15086 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15088 *total += rtx_cost (XEXP (x, 1), outer_code);
15092 else if (GET_CODE (XEXP (x, 0)) == MULT
15093 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15095 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15096 if (val == 2 || val == 4 || val == 8)
15098 *total = COSTS_N_INSNS (ix86_cost->lea);
15099 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15100 *total += rtx_cost (XEXP (x, 1), outer_code);
15104 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15106 *total = COSTS_N_INSNS (ix86_cost->lea);
15107 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15108 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15109 *total += rtx_cost (XEXP (x, 1), outer_code);
15116 if (FLOAT_MODE_P (mode))
15118 *total = COSTS_N_INSNS (ix86_cost->fadd);
15126 if (!TARGET_64BIT && mode == DImode)
15128 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15129 + (rtx_cost (XEXP (x, 0), outer_code)
15130 << (GET_MODE (XEXP (x, 0)) != DImode))
15131 + (rtx_cost (XEXP (x, 1), outer_code)
15132 << (GET_MODE (XEXP (x, 1)) != DImode)));
15138 if (FLOAT_MODE_P (mode))
15140 *total = COSTS_N_INSNS (ix86_cost->fchs);
15146 if (!TARGET_64BIT && mode == DImode)
15147 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15149 *total = COSTS_N_INSNS (ix86_cost->add);
15153 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15158 if (FLOAT_MODE_P (mode))
15159 *total = COSTS_N_INSNS (ix86_cost->fabs);
15163 if (FLOAT_MODE_P (mode))
15164 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15172 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15174 ix86_svr3_asm_out_constructor (symbol, priority)
15176 int priority ATTRIBUTE_UNUSED;
15179 fputs ("\tpushl $", asm_out_file);
15180 assemble_name (asm_out_file, XSTR (symbol, 0));
15181 fputc ('\n', asm_out_file);
15187 static int current_machopic_label_num;
15189 /* Given a symbol name and its associated stub, write out the
15190 definition of the stub. */
15193 machopic_output_stub (file, symb, stub)
15195 const char *symb, *stub;
15197 unsigned int length;
15198 char *binder_name, *symbol_name, lazy_ptr_name[32];
15199 int label = ++current_machopic_label_num;
15201 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15202 symb = (*targetm.strip_name_encoding) (symb);
15204 length = strlen (stub);
15205 binder_name = alloca (length + 32);
15206 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15208 length = strlen (symb);
15209 symbol_name = alloca (length + 32);
15210 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15212 sprintf (lazy_ptr_name, "L%d$lz", label);
15215 machopic_picsymbol_stub_section ();
15217 machopic_symbol_stub_section ();
15219 fprintf (file, "%s:\n", stub);
15220 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15224 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15225 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15226 fprintf (file, "\tjmp %%edx\n");
15229 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15231 fprintf (file, "%s:\n", binder_name);
15235 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15236 fprintf (file, "\tpushl %%eax\n");
15239 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15241 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15243 machopic_lazy_symbol_ptr_section ();
15244 fprintf (file, "%s:\n", lazy_ptr_name);
15245 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15246 fprintf (file, "\t.long %s\n", binder_name);
15248 #endif /* TARGET_MACHO */
15250 /* Order the registers for register allocator. */
15253 x86_order_regs_for_local_alloc ()
15258 /* First allocate the local general purpose registers. */
15259 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15260 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15261 reg_alloc_order [pos++] = i;
15263 /* Global general purpose registers. */
15264 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15265 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15266 reg_alloc_order [pos++] = i;
15268 /* x87 registers come first in case we are doing FP math
15270 if (!TARGET_SSE_MATH)
15271 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15272 reg_alloc_order [pos++] = i;
15274 /* SSE registers. */
15275 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15276 reg_alloc_order [pos++] = i;
15277 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15278 reg_alloc_order [pos++] = i;
15280 /* x87 registers. */
15281 if (TARGET_SSE_MATH)
15282 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15283 reg_alloc_order [pos++] = i;
15285 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15286 reg_alloc_order [pos++] = i;
15288 /* Initialize the rest of array as we do not allocate some registers
15290 while (pos < FIRST_PSEUDO_REGISTER)
15291 reg_alloc_order [pos++] = 0;
15294 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15295 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15298 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15299 struct attribute_spec.handler. */
15301 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15304 tree args ATTRIBUTE_UNUSED;
15305 int flags ATTRIBUTE_UNUSED;
15306 bool *no_add_attrs;
15309 if (DECL_P (*node))
15311 if (TREE_CODE (*node) == TYPE_DECL)
15312 type = &TREE_TYPE (*node);
15317 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15318 || TREE_CODE (*type) == UNION_TYPE)))
15320 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15321 *no_add_attrs = true;
15324 else if ((is_attribute_p ("ms_struct", name)
15325 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15326 || ((is_attribute_p ("gcc_struct", name)
15327 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15329 warning ("`%s' incompatible attribute ignored",
15330 IDENTIFIER_POINTER (name));
15331 *no_add_attrs = true;
15338 ix86_ms_bitfield_layout_p (record_type)
15341 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15342 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15343 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15346 /* Returns an expression indicating where the this parameter is
15347 located on entry to the FUNCTION. */
15350 x86_this_parameter (function)
15353 tree type = TREE_TYPE (function);
15357 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15358 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15361 if (ix86_fntype_regparm (type) > 0)
15365 parm = TYPE_ARG_TYPES (type);
15366 /* Figure out whether or not the function has a variable number of
15368 for (; parm; parm = TREE_CHAIN (parm))
15369 if (TREE_VALUE (parm) == void_type_node)
15371 /* If not, the this parameter is in %eax. */
15373 return gen_rtx_REG (SImode, 0);
15376 if (aggregate_value_p (TREE_TYPE (type)))
15377 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15379 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15382 /* Determine whether x86_output_mi_thunk can succeed. */
15385 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15386 tree thunk ATTRIBUTE_UNUSED;
15387 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15388 HOST_WIDE_INT vcall_offset;
15391 /* 64-bit can handle anything. */
15395 /* For 32-bit, everything's fine if we have one free register. */
15396 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15399 /* Need a free register for vcall_offset. */
15403 /* Need a free register for GOT references. */
15404 if (flag_pic && !(*targetm.binds_local_p) (function))
15407 /* Otherwise ok. */
15411 /* Output the assembler code for a thunk function. THUNK_DECL is the
15412 declaration for the thunk function itself, FUNCTION is the decl for
15413 the target function. DELTA is an immediate constant offset to be
15414 added to THIS. If VCALL_OFFSET is nonzero, the word at
15415 *(*this + vcall_offset) should be added to THIS. */
15418 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15419 FILE *file ATTRIBUTE_UNUSED;
15420 tree thunk ATTRIBUTE_UNUSED;
15421 HOST_WIDE_INT delta;
15422 HOST_WIDE_INT vcall_offset;
15426 rtx this = x86_this_parameter (function);
15429 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15430 pull it in now and let DELTA benefit. */
15433 else if (vcall_offset)
15435 /* Put the this parameter into %eax. */
15437 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15438 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15441 this_reg = NULL_RTX;
15443 /* Adjust the this parameter by a fixed constant. */
15446 xops[0] = GEN_INT (delta);
15447 xops[1] = this_reg ? this_reg : this;
15450 if (!x86_64_general_operand (xops[0], DImode))
15452 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15454 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15458 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15461 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15464 /* Adjust the this parameter by a value stored in the vtable. */
15468 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15470 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15472 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15475 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15477 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15479 /* Adjust the this parameter. */
15480 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15481 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15483 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15484 xops[0] = GEN_INT (vcall_offset);
15486 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15487 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15489 xops[1] = this_reg;
15491 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15493 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15496 /* If necessary, drop THIS back to its stack slot. */
15497 if (this_reg && this_reg != this)
15499 xops[0] = this_reg;
15501 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15504 xops[0] = DECL_RTL (function);
15507 if (!flag_pic || (*targetm.binds_local_p) (function))
15508 output_asm_insn ("jmp\t%P0", xops);
15511 tmp = XEXP (xops[0], 0);
15512 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15513 tmp = gen_rtx_CONST (Pmode, tmp);
15514 tmp = gen_rtx_MEM (QImode, tmp);
15516 output_asm_insn ("jmp\t%A0", xops);
15521 if (!flag_pic || (*targetm.binds_local_p) (function))
15522 output_asm_insn ("jmp\t%P0", xops);
15527 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15528 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15529 tmp = gen_rtx_MEM (QImode, tmp);
15531 output_asm_insn ("jmp\t%0", xops);
15534 #endif /* TARGET_MACHO */
15536 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15537 output_set_got (tmp);
15540 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15541 output_asm_insn ("jmp\t{*}%1", xops);
15547 x86_field_alignment (field, computed)
15551 enum machine_mode mode;
15552 tree type = TREE_TYPE (field);
15554 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15556 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15557 ? get_inner_array_type (type) : type);
15558 if (mode == DFmode || mode == DCmode
15559 || GET_MODE_CLASS (mode) == MODE_INT
15560 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15561 return MIN (32, computed);
15565 /* Output assembler code to FILE to increment profiler label # LABELNO
15566 for profiling a function entry. */
15568 x86_function_profiler (file, labelno)
15570 int labelno ATTRIBUTE_UNUSED;
15575 #ifndef NO_PROFILE_COUNTERS
15576 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15578 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15582 #ifndef NO_PROFILE_COUNTERS
15583 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15585 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15589 #ifndef NO_PROFILE_COUNTERS
15590 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15591 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15593 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15597 #ifndef NO_PROFILE_COUNTERS
15598 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15599 PROFILE_COUNT_REGISTER);
15601 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15605 /* Implement machine specific optimizations.
15606 At the moment we implement single transformation: AMD Athlon works faster
15607 when RET is not destination of conditional jump or directly preceded
15608 by other jump instruction. We avoid the penalty by inserting NOP just
15609 before the RET instructions in such cases. */
15611 x86_machine_dependent_reorg (first)
15612 rtx first ATTRIBUTE_UNUSED;
15616 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15618 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15620 basic_block bb = e->src;
15623 bool insert = false;
15625 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15627 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15628 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15630 if (prev && GET_CODE (prev) == CODE_LABEL)
15633 for (e = bb->pred; e; e = e->pred_next)
15634 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15635 && !(e->flags & EDGE_FALLTHRU))
15640 prev = prev_active_insn (ret);
15641 if (prev && GET_CODE (prev) == JUMP_INSN
15642 && any_condjump_p (prev))
15644 /* Empty functions get branch misspredict even when the jump destination
15645 is not visible to us. */
15646 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15650 emit_insn_before (gen_nop (), ret);
15654 /* Return nonzero when QImode register that must be represented via REX prefix
15657 x86_extended_QIreg_mentioned_p (insn)
15661 extract_insn_cached (insn);
15662 for (i = 0; i < recog_data.n_operands; i++)
15663 if (REG_P (recog_data.operand[i])
15664 && REGNO (recog_data.operand[i]) >= 4)
15669 /* Return nonzero when P points to register encoded via REX prefix.
15670 Called via for_each_rtx. */
15672 extended_reg_mentioned_1 (p, data)
15674 void *data ATTRIBUTE_UNUSED;
15676 unsigned int regno;
15679 regno = REGNO (*p);
15680 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15683 /* Return true when INSN mentions register that must be encoded using REX
15686 x86_extended_reg_mentioned_p (insn)
15689 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15692 /* Generate an unsigned DImode to FP conversion. This is the same code
15693 optabs would emit if we didn't have TFmode patterns. */
15696 x86_emit_floatuns (operands)
15699 rtx neglab, donelab, i0, i1, f0, in, out;
15700 enum machine_mode mode;
15703 in = force_reg (DImode, operands[1]);
15704 mode = GET_MODE (out);
15705 neglab = gen_label_rtx ();
15706 donelab = gen_label_rtx ();
15707 i1 = gen_reg_rtx (Pmode);
15708 f0 = gen_reg_rtx (mode);
15710 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15712 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15713 emit_jump_insn (gen_jump (donelab));
15716 emit_label (neglab);
15718 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15719 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15720 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15721 expand_float (f0, i0, 0);
15722 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15724 emit_label (donelab);
15727 /* Return if we do not know how to pass TYPE solely in registers. */
15729 ix86_must_pass_in_stack (mode, type)
15730 enum machine_mode mode;
15733 if (default_must_pass_in_stack (mode, type))
15735 return (!TARGET_64BIT && type && mode == TImode);
15738 #include "gt-i386.h"