1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Processor costs (relative to an add) */
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
98 /* Processor costs (relative to an add) */
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
451 const struct processor_costs *ix86_cost = &pentium_cost;
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts instead of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
517 /* In case the average insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
520 #define FAST_PROLOGUE_INSN_COUNT 20
522 /* Set by prologue expander and used by epilogue expander to determine
524 static int use_fast_prologue_epilogue;
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
537 AREG, DREG, CREG, BREG,
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 /* The "default" register map used in 32bit mode. */
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
570 static int const x86_64_int_parameter_registers[6] =
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
576 static int const x86_64_int_return_registers[4] =
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
664 /* The encoding characters for the four TLS models present in ELF. */
666 static char const tls_model_chars[] = " GLil";
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
684 /* Structure describing stack frame layout.
685 Stack grows downward:
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
698 > to_allocate <- FRAME_POINTER
710 int outgoing_arguments_size;
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
725 enum cmodel ix86_cmodel;
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
752 /* ix86_regparm_string as a number */
755 /* Alignment to use for loops and jumps: */
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
830 rtx base, index, disp;
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
875 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
876 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
879 /* Register class used for passing given 64bit part of the argument.
880 These represent classes as documented by the PS ABI, with the exception
881 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
882 use SF or DFmode move instead of DImode to avoid reformatting penalties.
884 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
885 whenever possible (upper half does contain padding).
887 enum x86_64_reg_class
890 X86_64_INTEGER_CLASS,
891 X86_64_INTEGERSI_CLASS,
900 static const char * const x86_64_reg_class_name[] =
901 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
903 #define MAX_CLASSES 4
904 static int classify_argument PARAMS ((enum machine_mode, tree,
905 enum x86_64_reg_class [MAX_CLASSES],
907 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
909 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
911 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
912 enum x86_64_reg_class));
914 /* Initialize the GCC target structure. */
915 #undef TARGET_ATTRIBUTE_TABLE
916 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
917 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
918 # undef TARGET_MERGE_DECL_ATTRIBUTES
919 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
922 #undef TARGET_COMP_TYPE_ATTRIBUTES
923 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
925 #undef TARGET_INIT_BUILTINS
926 #define TARGET_INIT_BUILTINS ix86_init_builtins
928 #undef TARGET_EXPAND_BUILTIN
929 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
931 #undef TARGET_ASM_FUNCTION_EPILOGUE
932 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
934 #undef TARGET_ASM_OPEN_PAREN
935 #define TARGET_ASM_OPEN_PAREN ""
936 #undef TARGET_ASM_CLOSE_PAREN
937 #define TARGET_ASM_CLOSE_PAREN ""
939 #undef TARGET_ASM_ALIGNED_HI_OP
940 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
941 #undef TARGET_ASM_ALIGNED_SI_OP
942 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
944 #undef TARGET_ASM_ALIGNED_DI_OP
945 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
948 #undef TARGET_ASM_UNALIGNED_HI_OP
949 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
950 #undef TARGET_ASM_UNALIGNED_SI_OP
951 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
952 #undef TARGET_ASM_UNALIGNED_DI_OP
953 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
955 #undef TARGET_SCHED_ADJUST_COST
956 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
957 #undef TARGET_SCHED_ISSUE_RATE
958 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
959 #undef TARGET_SCHED_VARIABLE_ISSUE
960 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
961 #undef TARGET_SCHED_INIT
962 #define TARGET_SCHED_INIT ix86_sched_init
963 #undef TARGET_SCHED_REORDER
964 #define TARGET_SCHED_REORDER ix86_sched_reorder
965 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
966 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
967 ia32_use_dfa_pipeline_interface
968 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
969 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
970 ia32_multipass_dfa_lookahead
972 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
973 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
976 #undef TARGET_HAVE_TLS
977 #define TARGET_HAVE_TLS true
979 #undef TARGET_CANNOT_FORCE_CONST_MEM
980 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
982 #undef TARGET_MS_BITFIELD_LAYOUT_P
983 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
985 #undef TARGET_ASM_OUTPUT_MI_THUNK
986 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
987 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
988 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
990 struct gcc_target targetm = TARGET_INITIALIZER;
992 /* Sometimes certain combinations of command options do not make
993 sense on a particular target machine. You can define a macro
994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
995 defined, is executed once just after all the command options have
998 Don't use this macro to turn on various extra optimizations for
999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1005 /* Comes from final.c -- no real reason to change it. */
1006 #define MAX_CODE_ALIGN 16
1010 const struct processor_costs *cost; /* Processor costs */
1011 const int target_enable; /* Target flags to enable. */
1012 const int target_disable; /* Target flags to disable. */
1013 const int align_loop; /* Default alignments. */
1014 const int align_loop_max_skip;
1015 const int align_jump;
1016 const int align_jump_max_skip;
1017 const int align_func;
1019 const processor_target_table[PROCESSOR_max] =
1021 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1022 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1023 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1024 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1025 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1026 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1027 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1028 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1031 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1034 const char *const name; /* processor name or nickname. */
1035 const enum processor_type processor;
1036 const enum pta_flags
1041 PTA_PREFETCH_SSE = 8,
1047 const processor_alias_table[] =
1049 {"i386", PROCESSOR_I386, 0},
1050 {"i486", PROCESSOR_I486, 0},
1051 {"i586", PROCESSOR_PENTIUM, 0},
1052 {"pentium", PROCESSOR_PENTIUM, 0},
1053 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1054 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1055 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1056 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"i686", PROCESSOR_PENTIUMPRO, 0},
1058 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1060 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1061 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1062 PTA_MMX | PTA_PREFETCH_SSE},
1063 {"k6", PROCESSOR_K6, PTA_MMX},
1064 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1065 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1068 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1069 | PTA_3DNOW | PTA_3DNOW_A},
1070 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1071 | PTA_3DNOW_A | PTA_SSE},
1072 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1073 | PTA_3DNOW_A | PTA_SSE},
1074 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1075 | PTA_3DNOW_A | PTA_SSE},
1076 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1077 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1080 int const pta_size = ARRAY_SIZE (processor_alias_table);
1082 /* By default our XFmode is the 80-bit extended format. If we have
1083 use TFmode instead, it's also the 80-bit format, but with padding. */
1084 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1085 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1087 /* Set the default values for switches whose default depends on TARGET_64BIT
1088 in case they weren't overwritten by command line options. */
1091 if (flag_omit_frame_pointer == 2)
1092 flag_omit_frame_pointer = 1;
1093 if (flag_asynchronous_unwind_tables == 2)
1094 flag_asynchronous_unwind_tables = 1;
1095 if (flag_pcc_struct_return == 2)
1096 flag_pcc_struct_return = 0;
1100 if (flag_omit_frame_pointer == 2)
1101 flag_omit_frame_pointer = 0;
1102 if (flag_asynchronous_unwind_tables == 2)
1103 flag_asynchronous_unwind_tables = 0;
1104 if (flag_pcc_struct_return == 2)
1105 flag_pcc_struct_return = 1;
1108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1109 SUBTARGET_OVERRIDE_OPTIONS;
1112 if (!ix86_cpu_string && ix86_arch_string)
1113 ix86_cpu_string = ix86_arch_string;
1114 if (!ix86_cpu_string)
1115 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1116 if (!ix86_arch_string)
1117 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1119 if (ix86_cmodel_string != 0)
1121 if (!strcmp (ix86_cmodel_string, "small"))
1122 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1124 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1125 else if (!strcmp (ix86_cmodel_string, "32"))
1126 ix86_cmodel = CM_32;
1127 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1128 ix86_cmodel = CM_KERNEL;
1129 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1130 ix86_cmodel = CM_MEDIUM;
1131 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1132 ix86_cmodel = CM_LARGE;
1134 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1138 ix86_cmodel = CM_32;
1140 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1142 if (ix86_asm_string != 0)
1144 if (!strcmp (ix86_asm_string, "intel"))
1145 ix86_asm_dialect = ASM_INTEL;
1146 else if (!strcmp (ix86_asm_string, "att"))
1147 ix86_asm_dialect = ASM_ATT;
1149 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1151 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1152 error ("code model `%s' not supported in the %s bit mode",
1153 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1154 if (ix86_cmodel == CM_LARGE)
1155 sorry ("code model `large' not supported yet");
1156 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1157 sorry ("%i-bit mode not compiled in",
1158 (target_flags & MASK_64BIT) ? 64 : 32);
1160 for (i = 0; i < pta_size; i++)
1161 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1163 ix86_arch = processor_alias_table[i].processor;
1164 /* Default cpu tuning to the architecture. */
1165 ix86_cpu = ix86_arch;
1166 if (processor_alias_table[i].flags & PTA_MMX
1167 && !(target_flags_explicit & MASK_MMX))
1168 target_flags |= MASK_MMX;
1169 if (processor_alias_table[i].flags & PTA_3DNOW
1170 && !(target_flags_explicit & MASK_3DNOW))
1171 target_flags |= MASK_3DNOW;
1172 if (processor_alias_table[i].flags & PTA_3DNOW_A
1173 && !(target_flags_explicit & MASK_3DNOW_A))
1174 target_flags |= MASK_3DNOW_A;
1175 if (processor_alias_table[i].flags & PTA_SSE
1176 && !(target_flags_explicit & MASK_SSE))
1177 target_flags |= MASK_SSE;
1178 if (processor_alias_table[i].flags & PTA_SSE2
1179 && !(target_flags_explicit & MASK_SSE2))
1180 target_flags |= MASK_SSE2;
1181 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1182 x86_prefetch_sse = true;
1183 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1184 error ("CPU you selected does not support x86-64 instruction set");
1189 error ("bad value (%s) for -march= switch", ix86_arch_string);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1194 ix86_cpu = processor_alias_table[i].processor;
1195 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1196 error ("CPU you selected does not support x86-64 instruction set");
1199 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1200 x86_prefetch_sse = true;
1202 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1205 ix86_cost = &size_cost;
1207 ix86_cost = processor_target_table[ix86_cpu].cost;
1208 target_flags |= processor_target_table[ix86_cpu].target_enable;
1209 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1211 /* Arrange to set up i386_stack_locals for all functions. */
1212 init_machine_status = ix86_init_machine_status;
1214 /* Validate -mregparm= value. */
1215 if (ix86_regparm_string)
1217 i = atoi (ix86_regparm_string);
1218 if (i < 0 || i > REGPARM_MAX)
1219 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1225 ix86_regparm = REGPARM_MAX;
1227 /* If the user has provided any of the -malign-* options,
1228 warn and use that value only if -falign-* is not set.
1229 Remove this code in GCC 3.2 or later. */
1230 if (ix86_align_loops_string)
1232 warning ("-malign-loops is obsolete, use -falign-loops");
1233 if (align_loops == 0)
1235 i = atoi (ix86_align_loops_string);
1236 if (i < 0 || i > MAX_CODE_ALIGN)
1237 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1239 align_loops = 1 << i;
1243 if (ix86_align_jumps_string)
1245 warning ("-malign-jumps is obsolete, use -falign-jumps");
1246 if (align_jumps == 0)
1248 i = atoi (ix86_align_jumps_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 align_jumps = 1 << i;
1256 if (ix86_align_funcs_string)
1258 warning ("-malign-functions is obsolete, use -falign-functions");
1259 if (align_functions == 0)
1261 i = atoi (ix86_align_funcs_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 align_functions = 1 << i;
1269 /* Default align_* from the processor table. */
1270 if (align_loops == 0)
1272 align_loops = processor_target_table[ix86_cpu].align_loop;
1273 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1275 if (align_jumps == 0)
1277 align_jumps = processor_target_table[ix86_cpu].align_jump;
1278 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1280 if (align_functions == 0)
1282 align_functions = processor_target_table[ix86_cpu].align_func;
1285 /* Validate -mpreferred-stack-boundary= value, or provide default.
1286 The default of 128 bits is for Pentium III's SSE __m128, but we
1287 don't want additional code to keep the stack aligned when
1288 optimizing for code size. */
1289 ix86_preferred_stack_boundary = (optimize_size
1290 ? TARGET_64BIT ? 128 : 32
1292 if (ix86_preferred_stack_boundary_string)
1294 i = atoi (ix86_preferred_stack_boundary_string);
1295 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1296 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1297 TARGET_64BIT ? 4 : 2);
1299 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1302 /* Validate -mbranch-cost= value, or provide default. */
1303 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1304 if (ix86_branch_cost_string)
1306 i = atoi (ix86_branch_cost_string);
1308 error ("-mbranch-cost=%d is not between 0 and 5", i);
1310 ix86_branch_cost = i;
1313 if (ix86_tls_dialect_string)
1315 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1316 ix86_tls_dialect = TLS_DIALECT_GNU;
1317 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1318 ix86_tls_dialect = TLS_DIALECT_SUN;
1320 error ("bad value (%s) for -mtls-dialect= switch",
1321 ix86_tls_dialect_string);
1324 /* Keep nonleaf frame pointers. */
1325 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1326 flag_omit_frame_pointer = 1;
1328 /* If we're doing fast math, we don't care about comparison order
1329 wrt NaNs. This lets us use a shorter comparison sequence. */
1330 if (flag_unsafe_math_optimizations)
1331 target_flags &= ~MASK_IEEE_FP;
1333 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1334 since the insns won't need emulation. */
1335 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1336 target_flags &= ~MASK_NO_FANCY_MATH_387;
1340 if (TARGET_ALIGN_DOUBLE)
1341 error ("-malign-double makes no sense in the 64bit mode");
1343 error ("-mrtd calling convention not supported in the 64bit mode");
1344 /* Enable by default the SSE and MMX builtins. */
1345 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1346 ix86_fpmath = FPMATH_SSE;
1349 ix86_fpmath = FPMATH_387;
1351 if (ix86_fpmath_string != 0)
1353 if (! strcmp (ix86_fpmath_string, "387"))
1354 ix86_fpmath = FPMATH_387;
1355 else if (! strcmp (ix86_fpmath_string, "sse"))
1359 warning ("SSE instruction set disabled, using 387 arithmetics");
1360 ix86_fpmath = FPMATH_387;
1363 ix86_fpmath = FPMATH_SSE;
1365 else if (! strcmp (ix86_fpmath_string, "387,sse")
1366 || ! strcmp (ix86_fpmath_string, "sse,387"))
1370 warning ("SSE instruction set disabled, using 387 arithmetics");
1371 ix86_fpmath = FPMATH_387;
1373 else if (!TARGET_80387)
1375 warning ("387 instruction set disabled, using SSE arithmetics");
1376 ix86_fpmath = FPMATH_SSE;
1379 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1382 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1385 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1389 target_flags |= MASK_MMX;
1390 x86_prefetch_sse = true;
1393 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1396 target_flags |= MASK_MMX;
1397 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1398 extensions it adds. */
1399 if (x86_3dnow_a & (1 << ix86_arch))
1400 target_flags |= MASK_3DNOW_A;
1402 if ((x86_accumulate_outgoing_args & CPUMASK)
1403 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1405 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1407 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1410 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1411 p = strchr (internal_label_prefix, 'X');
1412 internal_label_prefix_len = p - internal_label_prefix;
1418 optimization_options (level, size)
1420 int size ATTRIBUTE_UNUSED;
1422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1423 make the problem with not enough registers even worse. */
1424 #ifdef INSN_SCHEDULING
1426 flag_schedule_insns = 0;
1429 /* The default values of these switches depend on the TARGET_64BIT
1430 that is not known at this moment. Mark these values with 2 and
1431 let user the to override these. In case there is no command line option
1432 specifying them, we will set the defaults in override_options. */
1434 flag_omit_frame_pointer = 2;
1435 flag_pcc_struct_return = 2;
1436 flag_asynchronous_unwind_tables = 2;
1439 /* Table of valid machine attributes. */
1440 const struct attribute_spec ix86_attribute_table[] =
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1443 /* Stdcall attribute says callee is responsible for popping arguments
1444 if they are not variable. */
1445 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1446 /* Fastcall attribute says callee is responsible for popping arguments
1447 if they are not variable. */
1448 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1449 /* Cdecl attribute says the callee is a normal C declaration */
1450 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1451 /* Regparm attribute specifies how many integer arguments are to be
1452 passed in registers. */
1453 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1454 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1455 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1456 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1459 { NULL, 0, 0, false, false, false, NULL }
1462 /* If PIC, we cannot make sibling calls to global functions
1463 because the PLT requires %ebx live.
1464 If we are returning floats on the register stack, we cannot make
1465 sibling calls to functions that return floats. (The stack adjust
1466 instruction will wind up after the sibcall jump, and not be executed.) */
1469 ix86_function_ok_for_sibcall (decl, exp)
1473 /* If we are generating position-independent code, we cannot sibcall
1474 optimize any indirect call, or a direct call to a global function,
1475 as the PLT requires %ebx be live. */
1476 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1479 /* If we are returning floats on the 80387 register stack, we cannot
1480 make a sibcall from a function that doesn't return a float to a
1481 function that does; the necessary stack adjustment will not be
1483 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1484 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1487 /* If this call is indirect, we'll need to be able to use a call-clobbered
1488 register for the address of the target function. Make sure that all
1489 such registers are not used for passing parameters. */
1490 if (!decl && !TARGET_64BIT)
1492 int regparm = ix86_regparm;
1495 /* We're looking at the CALL_EXPR, we need the type of the function. */
1496 type = TREE_OPERAND (exp, 0); /* pointer expression */
1497 type = TREE_TYPE (type); /* pointer type */
1498 type = TREE_TYPE (type); /* function type */
1500 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1502 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1506 /* ??? Need to count the actual number of registers to be used,
1507 not the possible number of registers. Fix later. */
1512 /* Otherwise okay. That also includes certain types of indirect calls. */
1516 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1517 arguments as in struct attribute_spec.handler. */
1519 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1522 tree args ATTRIBUTE_UNUSED;
1523 int flags ATTRIBUTE_UNUSED;
1526 if (TREE_CODE (*node) != FUNCTION_TYPE
1527 && TREE_CODE (*node) != METHOD_TYPE
1528 && TREE_CODE (*node) != FIELD_DECL
1529 && TREE_CODE (*node) != TYPE_DECL)
1531 warning ("`%s' attribute only applies to functions",
1532 IDENTIFIER_POINTER (name));
1533 *no_add_attrs = true;
1537 if (is_attribute_p ("fastcall", name))
1539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1541 error ("fastcall and stdcall attributes are not compatible");
1543 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1545 error ("fastcall and regparm attributes are not compatible");
1548 else if (is_attribute_p ("stdcall", name))
1550 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1552 error ("fastcall and stdcall attributes are not compatible");
1559 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1560 *no_add_attrs = true;
1566 /* Handle a "regparm" attribute;
1567 arguments as in struct attribute_spec.handler. */
1569 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1573 int flags ATTRIBUTE_UNUSED;
1576 if (TREE_CODE (*node) != FUNCTION_TYPE
1577 && TREE_CODE (*node) != METHOD_TYPE
1578 && TREE_CODE (*node) != FIELD_DECL
1579 && TREE_CODE (*node) != TYPE_DECL)
1581 warning ("`%s' attribute only applies to functions",
1582 IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1589 cst = TREE_VALUE (args);
1590 if (TREE_CODE (cst) != INTEGER_CST)
1592 warning ("`%s' attribute requires an integer constant argument",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1598 warning ("argument to `%s' attribute larger than %d",
1599 IDENTIFIER_POINTER (name), REGPARM_MAX);
1600 *no_add_attrs = true;
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and regparm attributes are not compatible");
1612 /* Return 0 if the attributes for two types are incompatible, 1 if they
1613 are compatible, and 2 if they are nearly compatible (which causes a
1614 warning to be generated). */
1617 ix86_comp_type_attributes (type1, type2)
1621 /* Check for mismatch of non-default calling convention. */
1622 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1624 if (TREE_CODE (type1) != FUNCTION_TYPE)
1627 /* Check for mismatched fastcall types */
1628 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1629 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1632 /* Check for mismatched return types (cdecl vs stdcall). */
1633 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1634 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1639 /* Return the regparm value for a fuctio with the indicated TYPE. */
1642 ix86_fntype_regparm (type)
1647 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1649 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1651 return ix86_regparm;
1654 /* Value is the number of bytes of arguments automatically
1655 popped when returning from a subroutine call.
1656 FUNDECL is the declaration node of the function (as a tree),
1657 FUNTYPE is the data type of the function (as a tree),
1658 or for a library call it is an identifier node for the subroutine name.
1659 SIZE is the number of bytes of arguments passed on the stack.
1661 On the 80386, the RTD insn may be used to pop them if the number
1662 of args is fixed, but if the number is variable then the caller
1663 must pop them all. RTD can't be used for library calls now
1664 because the library is compiled with the Unix compiler.
1665 Use of RTD is a selectable option, since it is incompatible with
1666 standard Unix calling sequences. If the option is not selected,
1667 the caller must always pop the args.
1669 The attribute stdcall is equivalent to RTD on a per module basis. */
1672 ix86_return_pops_args (fundecl, funtype, size)
1677 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1679 /* Cdecl functions override -mrtd, and never pop the stack. */
1680 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1682 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1688 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1689 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1690 == void_type_node)))
1694 /* Lose any fake structure return argument if it is passed on the stack. */
1695 if (aggregate_value_p (TREE_TYPE (funtype))
1698 int nregs = ix86_fntype_regparm (funtype);
1701 return GET_MODE_SIZE (Pmode);
1707 /* Argument support functions. */
1709 /* Return true when register may be used to pass function parameters. */
1711 ix86_function_arg_regno_p (regno)
1716 return (regno < REGPARM_MAX
1717 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1718 if (SSE_REGNO_P (regno) && TARGET_SSE)
1720 /* RAX is used as hidden argument to va_arg functions. */
1723 for (i = 0; i < REGPARM_MAX; i++)
1724 if (regno == x86_64_int_parameter_registers[i])
1729 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1730 for a call to a function whose data type is FNTYPE.
1731 For a library call, FNTYPE is 0. */
1734 init_cumulative_args (cum, fntype, libname)
1735 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1736 tree fntype; /* tree ptr for function decl */
1737 rtx libname; /* SYMBOL_REF of library name or 0 */
1739 static CUMULATIVE_ARGS zero_cum;
1740 tree param, next_param;
1742 if (TARGET_DEBUG_ARG)
1744 fprintf (stderr, "\ninit_cumulative_args (");
1746 fprintf (stderr, "fntype code = %s, ret code = %s",
1747 tree_code_name[(int) TREE_CODE (fntype)],
1748 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1750 fprintf (stderr, "no fntype");
1753 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1758 /* Set up the number of registers to use for passing arguments. */
1759 cum->nregs = ix86_regparm;
1760 cum->sse_nregs = SSE_REGPARM_MAX;
1761 if (fntype && !TARGET_64BIT)
1763 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1766 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1768 cum->maybe_vaarg = false;
1770 /* Use ecx and edx registers if function has fastcall attribute */
1771 if (fntype && !TARGET_64BIT)
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1781 /* Determine if this function has variable arguments. This is
1782 indicated by the last argument being 'void_type_mode' if there
1783 are no variable arguments. If there are variable arguments, then
1784 we won't pass anything in registers */
1788 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1789 param != 0; param = next_param)
1791 next_param = TREE_CHAIN (param);
1792 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1799 cum->maybe_vaarg = true;
1803 if ((!fntype && !libname)
1804 || (fntype && !TYPE_ARG_TYPES (fntype)))
1805 cum->maybe_vaarg = 1;
1807 if (TARGET_DEBUG_ARG)
1808 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1813 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1814 of this code is to classify each 8bytes of incoming argument by the register
1815 class and assign registers accordingly. */
1817 /* Return the union class of CLASS1 and CLASS2.
1818 See the x86-64 PS ABI for details. */
1820 static enum x86_64_reg_class
1821 merge_classes (class1, class2)
1822 enum x86_64_reg_class class1, class2;
1824 /* Rule #1: If both classes are equal, this is the resulting class. */
1825 if (class1 == class2)
1828 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1830 if (class1 == X86_64_NO_CLASS)
1832 if (class2 == X86_64_NO_CLASS)
1835 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1836 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1837 return X86_64_MEMORY_CLASS;
1839 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1840 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1841 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1842 return X86_64_INTEGERSI_CLASS;
1843 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1844 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1845 return X86_64_INTEGER_CLASS;
1847 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1848 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1849 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1850 return X86_64_MEMORY_CLASS;
1852 /* Rule #6: Otherwise class SSE is used. */
1853 return X86_64_SSE_CLASS;
1856 /* Classify the argument of type TYPE and mode MODE.
1857 CLASSES will be filled by the register class used to pass each word
1858 of the operand. The number of words is returned. In case the parameter
1859 should be passed in memory, 0 is returned. As a special case for zero
1860 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1862 BIT_OFFSET is used internally for handling records and specifies offset
1863 of the offset in bits modulo 256 to avoid overflow cases.
1865 See the x86-64 PS ABI for details.
1869 classify_argument (mode, type, classes, bit_offset)
1870 enum machine_mode mode;
1872 enum x86_64_reg_class classes[MAX_CLASSES];
1876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1879 /* Variable sized entities are always passed/returned in memory. */
1883 if (type && AGGREGATE_TYPE_P (type))
1887 enum x86_64_reg_class subclasses[MAX_CLASSES];
1889 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1893 for (i = 0; i < words; i++)
1894 classes[i] = X86_64_NO_CLASS;
1896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1897 signalize memory class, so handle it as special case. */
1900 classes[0] = X86_64_NO_CLASS;
1904 /* Classify each field of record and merge classes. */
1905 if (TREE_CODE (type) == RECORD_TYPE)
1907 /* For classes first merge in the field of the subclasses. */
1908 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1910 tree bases = TYPE_BINFO_BASETYPES (type);
1911 int n_bases = TREE_VEC_LENGTH (bases);
1914 for (i = 0; i < n_bases; ++i)
1916 tree binfo = TREE_VEC_ELT (bases, i);
1918 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1919 tree type = BINFO_TYPE (binfo);
1921 num = classify_argument (TYPE_MODE (type),
1923 (offset + bit_offset) % 256);
1926 for (i = 0; i < num; i++)
1928 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1930 merge_classes (subclasses[i], classes[i + pos]);
1934 /* And now merge the fields of structure. */
1935 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1937 if (TREE_CODE (field) == FIELD_DECL)
1941 /* Bitfields are always classified as integer. Handle them
1942 early, since later code would consider them to be
1943 misaligned integers. */
1944 if (DECL_BIT_FIELD (field))
1946 for (i = int_bit_position (field) / 8 / 8;
1947 i < (int_bit_position (field)
1948 + tree_low_cst (DECL_SIZE (field), 0)
1951 merge_classes (X86_64_INTEGER_CLASS,
1956 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1957 TREE_TYPE (field), subclasses,
1958 (int_bit_position (field)
1959 + bit_offset) % 256);
1962 for (i = 0; i < num; i++)
1965 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1967 merge_classes (subclasses[i], classes[i + pos]);
1973 /* Arrays are handled as small records. */
1974 else if (TREE_CODE (type) == ARRAY_TYPE)
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1978 TREE_TYPE (type), subclasses, bit_offset);
1982 /* The partial classes are now full classes. */
1983 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1984 subclasses[0] = X86_64_SSE_CLASS;
1985 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1986 subclasses[0] = X86_64_INTEGER_CLASS;
1988 for (i = 0; i < words; i++)
1989 classes[i] = subclasses[i % num];
1991 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1992 else if (TREE_CODE (type) == UNION_TYPE
1993 || TREE_CODE (type) == QUAL_UNION_TYPE)
1995 /* For classes first merge in the field of the subclasses. */
1996 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1998 tree bases = TYPE_BINFO_BASETYPES (type);
1999 int n_bases = TREE_VEC_LENGTH (bases);
2002 for (i = 0; i < n_bases; ++i)
2004 tree binfo = TREE_VEC_ELT (bases, i);
2006 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2007 tree type = BINFO_TYPE (binfo);
2009 num = classify_argument (TYPE_MODE (type),
2011 (offset + (bit_offset % 64)) % 256);
2014 for (i = 0; i < num; i++)
2016 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2018 merge_classes (subclasses[i], classes[i + pos]);
2022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2024 if (TREE_CODE (field) == FIELD_DECL)
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2032 for (i = 0; i < num; i++)
2033 classes[i] = merge_classes (subclasses[i], classes[i]);
2040 /* Final merger cleanup. */
2041 for (i = 0; i < words; i++)
2043 /* If one class is MEMORY, everything should be passed in
2045 if (classes[i] == X86_64_MEMORY_CLASS)
2048 /* The X86_64_SSEUP_CLASS should be always preceded by
2049 X86_64_SSE_CLASS. */
2050 if (classes[i] == X86_64_SSEUP_CLASS
2051 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2052 classes[i] = X86_64_SSE_CLASS;
2054 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2055 if (classes[i] == X86_64_X87UP_CLASS
2056 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2057 classes[i] = X86_64_SSE_CLASS;
2062 /* Compute alignment needed. We align all types to natural boundaries with
2063 exception of XFmode that is aligned to 64bits. */
2064 if (mode != VOIDmode && mode != BLKmode)
2066 int mode_alignment = GET_MODE_BITSIZE (mode);
2069 mode_alignment = 128;
2070 else if (mode == XCmode)
2071 mode_alignment = 256;
2072 /* Misaligned fields are always returned in memory. */
2073 if (bit_offset % mode_alignment)
2077 /* Classification of atomic types. */
2087 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2088 classes[0] = X86_64_INTEGERSI_CLASS;
2090 classes[0] = X86_64_INTEGER_CLASS;
2094 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2101 if (!(bit_offset % 64))
2102 classes[0] = X86_64_SSESF_CLASS;
2104 classes[0] = X86_64_SSE_CLASS;
2107 classes[0] = X86_64_SSEDF_CLASS;
2110 classes[0] = X86_64_X87_CLASS;
2111 classes[1] = X86_64_X87UP_CLASS;
2114 classes[0] = X86_64_X87_CLASS;
2115 classes[1] = X86_64_X87UP_CLASS;
2116 classes[2] = X86_64_X87_CLASS;
2117 classes[3] = X86_64_X87UP_CLASS;
2120 classes[0] = X86_64_SSEDF_CLASS;
2121 classes[1] = X86_64_SSEDF_CLASS;
2124 classes[0] = X86_64_SSE_CLASS;
2132 classes[0] = X86_64_SSE_CLASS;
2133 classes[1] = X86_64_SSEUP_CLASS;
2148 /* Examine the argument and return set number of register required in each
2149 class. Return 0 iff parameter should be passed in memory. */
2151 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2152 enum machine_mode mode;
2154 int *int_nregs, *sse_nregs;
2157 enum x86_64_reg_class class[MAX_CLASSES];
2158 int n = classify_argument (mode, type, class, 0);
2164 for (n--; n >= 0; n--)
2167 case X86_64_INTEGER_CLASS:
2168 case X86_64_INTEGERSI_CLASS:
2171 case X86_64_SSE_CLASS:
2172 case X86_64_SSESF_CLASS:
2173 case X86_64_SSEDF_CLASS:
2176 case X86_64_NO_CLASS:
2177 case X86_64_SSEUP_CLASS:
2179 case X86_64_X87_CLASS:
2180 case X86_64_X87UP_CLASS:
2184 case X86_64_MEMORY_CLASS:
2189 /* Construct container for the argument used by GCC interface. See
2190 FUNCTION_ARG for the detailed description. */
2192 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2193 enum machine_mode mode;
2196 int nintregs, nsseregs;
2200 enum machine_mode tmpmode;
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 enum x86_64_reg_class class[MAX_CLASSES];
2207 int needed_sseregs, needed_intregs;
2208 rtx exp[MAX_CLASSES];
2211 n = classify_argument (mode, type, class, 0);
2212 if (TARGET_DEBUG_ARG)
2215 fprintf (stderr, "Memory class\n");
2218 fprintf (stderr, "Classes:");
2219 for (i = 0; i < n; i++)
2221 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2223 fprintf (stderr, "\n");
2228 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2230 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2233 /* First construct simple cases. Avoid SCmode, since we want to use
2234 single register to pass this type. */
2235 if (n == 1 && mode != SCmode)
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 return gen_rtx_REG (mode, intreg[0]);
2241 case X86_64_SSE_CLASS:
2242 case X86_64_SSESF_CLASS:
2243 case X86_64_SSEDF_CLASS:
2244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2245 case X86_64_X87_CLASS:
2246 return gen_rtx_REG (mode, FIRST_STACK_REG);
2247 case X86_64_NO_CLASS:
2248 /* Zero sized array, struct or class. */
2253 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2254 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2256 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2257 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2258 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2259 && class[1] == X86_64_INTEGER_CLASS
2260 && (mode == CDImode || mode == TImode)
2261 && intreg[0] + 1 == intreg[1])
2262 return gen_rtx_REG (mode, intreg[0]);
2264 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2265 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2266 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2268 /* Otherwise figure out the entries of the PARALLEL. */
2269 for (i = 0; i < n; i++)
2273 case X86_64_NO_CLASS:
2275 case X86_64_INTEGER_CLASS:
2276 case X86_64_INTEGERSI_CLASS:
2277 /* Merge TImodes on aligned occasions here too. */
2278 if (i * 8 + 8 > bytes)
2279 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2280 else if (class[i] == X86_64_INTEGERSI_CLASS)
2284 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2285 if (tmpmode == BLKmode)
2287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2288 gen_rtx_REG (tmpmode, *intreg),
2292 case X86_64_SSESF_CLASS:
2293 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2294 gen_rtx_REG (SFmode,
2295 SSE_REGNO (sse_regno)),
2299 case X86_64_SSEDF_CLASS:
2300 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2301 gen_rtx_REG (DFmode,
2302 SSE_REGNO (sse_regno)),
2306 case X86_64_SSE_CLASS:
2307 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (tmpmode,
2313 SSE_REGNO (sse_regno)),
2315 if (tmpmode == TImode)
2323 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2324 for (i = 0; i < nexps; i++)
2325 XVECEXP (ret, 0, i) = exp [i];
2329 /* Update the data in CUM to advance over an argument
2330 of mode MODE and data type TYPE.
2331 (TYPE is null for libcalls where that information may not be available.) */
2334 function_arg_advance (cum, mode, type, named)
2335 CUMULATIVE_ARGS *cum; /* current arg information */
2336 enum machine_mode mode; /* current arg mode */
2337 tree type; /* type of the argument or 0 if lib support */
2338 int named; /* whether or not the argument was named */
2341 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2342 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2344 if (TARGET_DEBUG_ARG)
2346 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2347 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2350 int int_nregs, sse_nregs;
2351 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2352 cum->words += words;
2353 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2355 cum->nregs -= int_nregs;
2356 cum->sse_nregs -= sse_nregs;
2357 cum->regno += int_nregs;
2358 cum->sse_regno += sse_nregs;
2361 cum->words += words;
2365 if (TARGET_SSE && mode == TImode)
2367 cum->sse_words += words;
2368 cum->sse_nregs -= 1;
2369 cum->sse_regno += 1;
2370 if (cum->sse_nregs <= 0)
2378 cum->words += words;
2379 cum->nregs -= words;
2380 cum->regno += words;
2382 if (cum->nregs <= 0)
2392 /* Define where to put the arguments to a function.
2393 Value is zero to push the argument on the stack,
2394 or a hard register in which to store the argument.
2396 MODE is the argument's machine mode.
2397 TYPE is the data type of the argument (as a tree).
2398 This is null for libcalls where that information may
2400 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2401 the preceding args and about the function being called.
2402 NAMED is nonzero if this argument is a named parameter
2403 (otherwise it is an extra parameter matching an ellipsis). */
2406 function_arg (cum, mode, type, named)
2407 CUMULATIVE_ARGS *cum; /* current arg information */
2408 enum machine_mode mode; /* current arg mode */
2409 tree type; /* type of the argument or 0 if lib support */
2410 int named; /* != 0 for normal args, == 0 for ... args */
2414 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2415 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2417 /* Handle a hidden AL argument containing number of registers for varargs
2418 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2420 if (mode == VOIDmode)
2423 return GEN_INT (cum->maybe_vaarg
2424 ? (cum->sse_nregs < 0
2432 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2433 &x86_64_int_parameter_registers [cum->regno],
2438 /* For now, pass fp/complex values on the stack. */
2447 if (words <= cum->nregs)
2449 int regno = cum->regno;
2451 /* Fastcall allocates the first two DWORD (SImode) or
2452 smaller arguments to ECX and EDX. */
2455 if (mode == BLKmode || mode == DImode)
2458 /* ECX not EAX is the first allocated register. */
2462 ret = gen_rtx_REG (mode, regno);
2467 ret = gen_rtx_REG (mode, cum->sse_regno);
2471 if (TARGET_DEBUG_ARG)
2474 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2475 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2478 print_simple_rtl (stderr, ret);
2480 fprintf (stderr, ", stack");
2482 fprintf (stderr, " )\n");
2488 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2492 ix86_function_arg_boundary (mode, type)
2493 enum machine_mode mode;
2498 return PARM_BOUNDARY;
2500 align = TYPE_ALIGN (type);
2502 align = GET_MODE_ALIGNMENT (mode);
2503 if (align < PARM_BOUNDARY)
2504 align = PARM_BOUNDARY;
2510 /* Return true if N is a possible register number of function value. */
2512 ix86_function_value_regno_p (regno)
2517 return ((regno) == 0
2518 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2519 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2521 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2522 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2523 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2526 /* Define how to find the value returned by a function.
2527 VALTYPE is the data type of the value (as a tree).
2528 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2529 otherwise, FUNC is 0. */
2531 ix86_function_value (valtype)
2536 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2537 REGPARM_MAX, SSE_REGPARM_MAX,
2538 x86_64_int_return_registers, 0);
2539 /* For zero sized structures, construct_container return NULL, but we need
2540 to keep rest of compiler happy by returning meaningful value. */
2542 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2546 return gen_rtx_REG (TYPE_MODE (valtype),
2547 ix86_value_regno (TYPE_MODE (valtype)));
2550 /* Return false iff type is returned in memory. */
2552 ix86_return_in_memory (type)
2555 int needed_intregs, needed_sseregs;
2558 return !examine_argument (TYPE_MODE (type), type, 1,
2559 &needed_intregs, &needed_sseregs);
2563 if (TYPE_MODE (type) == BLKmode
2564 || (VECTOR_MODE_P (TYPE_MODE (type))
2565 && int_size_in_bytes (type) == 8)
2566 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2567 && TYPE_MODE (type) != TFmode
2568 && !VECTOR_MODE_P (TYPE_MODE (type))))
2574 /* Define how to find the value returned by a library function
2575 assuming the value has mode MODE. */
2577 ix86_libcall_value (mode)
2578 enum machine_mode mode;
2588 return gen_rtx_REG (mode, FIRST_SSE_REG);
2591 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2593 return gen_rtx_REG (mode, 0);
2597 return gen_rtx_REG (mode, ix86_value_regno (mode));
2600 /* Given a mode, return the register to use for a return value. */
2603 ix86_value_regno (mode)
2604 enum machine_mode mode;
2606 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2607 return FIRST_FLOAT_REG;
2608 if (mode == TImode || VECTOR_MODE_P (mode))
2609 return FIRST_SSE_REG;
2613 /* Create the va_list data type. */
2616 ix86_build_va_list ()
2618 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2620 /* For i386 we use plain pointer to argument area. */
2622 return build_pointer_type (char_type_node);
2624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2625 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2627 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2628 unsigned_type_node);
2629 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2630 unsigned_type_node);
2631 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2633 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2636 DECL_FIELD_CONTEXT (f_gpr) = record;
2637 DECL_FIELD_CONTEXT (f_fpr) = record;
2638 DECL_FIELD_CONTEXT (f_ovf) = record;
2639 DECL_FIELD_CONTEXT (f_sav) = record;
2641 TREE_CHAIN (record) = type_decl;
2642 TYPE_NAME (record) = type_decl;
2643 TYPE_FIELDS (record) = f_gpr;
2644 TREE_CHAIN (f_gpr) = f_fpr;
2645 TREE_CHAIN (f_fpr) = f_ovf;
2646 TREE_CHAIN (f_ovf) = f_sav;
2648 layout_type (record);
2650 /* The correct type is an array type of one element. */
2651 return build_array_type (record, build_index_type (size_zero_node));
2654 /* Perform any needed actions needed for a function that is receiving a
2655 variable number of arguments.
2659 MODE and TYPE are the mode and type of the current parameter.
2661 PRETEND_SIZE is a variable that should be set to the amount of stack
2662 that must be pushed by the prolog to pretend that our caller pushed
2665 Normally, this macro will push all remaining incoming registers on the
2666 stack and set PRETEND_SIZE to the length of the registers pushed. */
2669 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2670 CUMULATIVE_ARGS *cum;
2671 enum machine_mode mode;
2673 int *pretend_size ATTRIBUTE_UNUSED;
2677 CUMULATIVE_ARGS next_cum;
2678 rtx save_area = NULL_RTX, mem;
2691 /* Indicate to allocate space on the stack for varargs save area. */
2692 ix86_save_varrargs_registers = 1;
2694 fntype = TREE_TYPE (current_function_decl);
2695 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2697 != void_type_node));
2699 /* For varargs, we do not want to skip the dummy va_dcl argument.
2700 For stdargs, we do want to skip the last named argument. */
2703 function_arg_advance (&next_cum, mode, type, 1);
2706 save_area = frame_pointer_rtx;
2708 set = get_varargs_alias_set ();
2710 for (i = next_cum.regno; i < ix86_regparm; i++)
2712 mem = gen_rtx_MEM (Pmode,
2713 plus_constant (save_area, i * UNITS_PER_WORD));
2714 set_mem_alias_set (mem, set);
2715 emit_move_insn (mem, gen_rtx_REG (Pmode,
2716 x86_64_int_parameter_registers[i]));
2719 if (next_cum.sse_nregs)
2721 /* Now emit code to save SSE registers. The AX parameter contains number
2722 of SSE parameter registers used to call this function. We use
2723 sse_prologue_save insn template that produces computed jump across
2724 SSE saves. We need some preparation work to get this working. */
2726 label = gen_label_rtx ();
2727 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2729 /* Compute address to jump to :
2730 label - 5*eax + nnamed_sse_arguments*5 */
2731 tmp_reg = gen_reg_rtx (Pmode);
2732 nsse_reg = gen_reg_rtx (Pmode);
2733 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2734 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2735 gen_rtx_MULT (Pmode, nsse_reg,
2737 if (next_cum.sse_regno)
2740 gen_rtx_CONST (DImode,
2741 gen_rtx_PLUS (DImode,
2743 GEN_INT (next_cum.sse_regno * 4))));
2745 emit_move_insn (nsse_reg, label_ref);
2746 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2748 /* Compute address of memory block we save into. We always use pointer
2749 pointing 127 bytes after first byte to store - this is needed to keep
2750 instruction size limited by 4 bytes. */
2751 tmp_reg = gen_reg_rtx (Pmode);
2752 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2753 plus_constant (save_area,
2754 8 * REGPARM_MAX + 127)));
2755 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2756 set_mem_alias_set (mem, set);
2757 set_mem_align (mem, BITS_PER_WORD);
2759 /* And finally do the dirty job! */
2760 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2761 GEN_INT (next_cum.sse_regno), label));
2766 /* Implement va_start. */
2769 ix86_va_start (valist, nextarg)
2773 HOST_WIDE_INT words, n_gpr, n_fpr;
2774 tree f_gpr, f_fpr, f_ovf, f_sav;
2775 tree gpr, fpr, ovf, sav, t;
2777 /* Only 64bit target needs something special. */
2780 std_expand_builtin_va_start (valist, nextarg);
2784 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2785 f_fpr = TREE_CHAIN (f_gpr);
2786 f_ovf = TREE_CHAIN (f_fpr);
2787 f_sav = TREE_CHAIN (f_ovf);
2789 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2790 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2791 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2792 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2793 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2795 /* Count number of gp and fp argument registers used. */
2796 words = current_function_args_info.words;
2797 n_gpr = current_function_args_info.regno;
2798 n_fpr = current_function_args_info.sse_regno;
2800 if (TARGET_DEBUG_ARG)
2801 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2802 (int) words, (int) n_gpr, (int) n_fpr);
2804 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2805 build_int_2 (n_gpr * 8, 0));
2806 TREE_SIDE_EFFECTS (t) = 1;
2807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2810 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2811 TREE_SIDE_EFFECTS (t) = 1;
2812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2814 /* Find the overflow area. */
2815 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2817 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2818 build_int_2 (words * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2823 /* Find the register save area.
2824 Prologue of the function save it right above stack frame. */
2825 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2826 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2827 TREE_SIDE_EFFECTS (t) = 1;
2828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2831 /* Implement va_arg. */
2833 ix86_va_arg (valist, type)
2836 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2837 tree f_gpr, f_fpr, f_ovf, f_sav;
2838 tree gpr, fpr, ovf, sav, t;
2840 rtx lab_false, lab_over = NULL_RTX;
2844 /* Only 64bit target needs something special. */
2847 return std_expand_builtin_va_arg (valist, type);
2850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2851 f_fpr = TREE_CHAIN (f_gpr);
2852 f_ovf = TREE_CHAIN (f_fpr);
2853 f_sav = TREE_CHAIN (f_ovf);
2855 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2856 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2857 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2858 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2859 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2861 size = int_size_in_bytes (type);
2862 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2864 container = construct_container (TYPE_MODE (type), type, 0,
2865 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2867 * Pull the value out of the saved registers ...
2870 addr_rtx = gen_reg_rtx (Pmode);
2874 rtx int_addr_rtx, sse_addr_rtx;
2875 int needed_intregs, needed_sseregs;
2878 lab_over = gen_label_rtx ();
2879 lab_false = gen_label_rtx ();
2881 examine_argument (TYPE_MODE (type), type, 0,
2882 &needed_intregs, &needed_sseregs);
2885 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2886 || TYPE_ALIGN (type) > 128);
2888 /* In case we are passing structure, verify that it is consecutive block
2889 on the register save area. If not we need to do moves. */
2890 if (!need_temp && !REG_P (container))
2892 /* Verify that all registers are strictly consecutive */
2893 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2897 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2899 rtx slot = XVECEXP (container, 0, i);
2900 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2901 || INTVAL (XEXP (slot, 1)) != i * 16)
2909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2911 rtx slot = XVECEXP (container, 0, i);
2912 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2913 || INTVAL (XEXP (slot, 1)) != i * 8)
2920 int_addr_rtx = addr_rtx;
2921 sse_addr_rtx = addr_rtx;
2925 int_addr_rtx = gen_reg_rtx (Pmode);
2926 sse_addr_rtx = gen_reg_rtx (Pmode);
2928 /* First ensure that we fit completely in registers. */
2931 emit_cmp_and_jump_insns (expand_expr
2932 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2933 GEN_INT ((REGPARM_MAX - needed_intregs +
2934 1) * 8), GE, const1_rtx, SImode,
2939 emit_cmp_and_jump_insns (expand_expr
2940 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2941 GEN_INT ((SSE_REGPARM_MAX -
2942 needed_sseregs + 1) * 16 +
2943 REGPARM_MAX * 8), GE, const1_rtx,
2944 SImode, 1, lab_false);
2947 /* Compute index to start of area used for integer regs. */
2950 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2951 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != int_addr_rtx)
2953 emit_move_insn (int_addr_rtx, r);
2957 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2958 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2959 if (r != sse_addr_rtx)
2960 emit_move_insn (sse_addr_rtx, r);
2967 /* Never use the memory itself, as it has the alias set. */
2968 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2969 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2970 set_mem_alias_set (mem, get_varargs_alias_set ());
2971 set_mem_align (mem, BITS_PER_UNIT);
2973 for (i = 0; i < XVECLEN (container, 0); i++)
2975 rtx slot = XVECEXP (container, 0, i);
2976 rtx reg = XEXP (slot, 0);
2977 enum machine_mode mode = GET_MODE (reg);
2983 if (SSE_REGNO_P (REGNO (reg)))
2985 src_addr = sse_addr_rtx;
2986 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2990 src_addr = int_addr_rtx;
2991 src_offset = REGNO (reg) * 8;
2993 src_mem = gen_rtx_MEM (mode, src_addr);
2994 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2995 src_mem = adjust_address (src_mem, mode, src_offset);
2996 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2997 emit_move_insn (dest_mem, src_mem);
3004 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3005 build_int_2 (needed_intregs * 8, 0));
3006 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3013 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3014 build_int_2 (needed_sseregs * 16, 0));
3015 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3016 TREE_SIDE_EFFECTS (t) = 1;
3017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3020 emit_jump_insn (gen_jump (lab_over));
3022 emit_label (lab_false);
3025 /* ... otherwise out of the overflow area. */
3027 /* Care for on-stack alignment if needed. */
3028 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3032 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3033 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3034 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3038 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3040 emit_move_insn (addr_rtx, r);
3043 build (PLUS_EXPR, TREE_TYPE (t), t,
3044 build_int_2 (rsize * UNITS_PER_WORD, 0));
3045 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3050 emit_label (lab_over);
3055 /* Return nonzero if OP is either a i387 or SSE fp register. */
3057 any_fp_register_operand (op, mode)
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 return ANY_FP_REG_P (op);
3064 /* Return nonzero if OP is an i387 fp register. */
3066 fp_register_operand (op, mode)
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3070 return FP_REG_P (op);
3073 /* Return nonzero if OP is a non-fp register_operand. */
3075 register_and_not_any_fp_reg_operand (op, mode)
3077 enum machine_mode mode;
3079 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3082 /* Return nonzero if OP is a register operand other than an
3083 i387 fp register. */
3085 register_and_not_fp_reg_operand (op, mode)
3087 enum machine_mode mode;
3089 return register_operand (op, mode) && !FP_REG_P (op);
3092 /* Return nonzero if OP is general operand representable on x86_64. */
3095 x86_64_general_operand (op, mode)
3097 enum machine_mode mode;
3100 return general_operand (op, mode);
3101 if (nonimmediate_operand (op, mode))
3103 return x86_64_sign_extended_value (op);
3106 /* Return nonzero if OP is general operand representable on x86_64
3107 as either sign extended or zero extended constant. */
3110 x86_64_szext_general_operand (op, mode)
3112 enum machine_mode mode;
3115 return general_operand (op, mode);
3116 if (nonimmediate_operand (op, mode))
3118 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3121 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3124 x86_64_nonmemory_operand (op, mode)
3126 enum machine_mode mode;
3129 return nonmemory_operand (op, mode);
3130 if (register_operand (op, mode))
3132 return x86_64_sign_extended_value (op);
3135 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3138 x86_64_movabs_operand (op, mode)
3140 enum machine_mode mode;
3142 if (!TARGET_64BIT || !flag_pic)
3143 return nonmemory_operand (op, mode);
3144 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3146 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3151 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3154 x86_64_szext_nonmemory_operand (op, mode)
3156 enum machine_mode mode;
3159 return nonmemory_operand (op, mode);
3160 if (register_operand (op, mode))
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3165 /* Return nonzero if OP is immediate operand representable on x86_64. */
3168 x86_64_immediate_operand (op, mode)
3170 enum machine_mode mode;
3173 return immediate_operand (op, mode);
3174 return x86_64_sign_extended_value (op);
3177 /* Return nonzero if OP is immediate operand representable on x86_64. */
3180 x86_64_zext_immediate_operand (op, mode)
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3184 return x86_64_zero_extended_value (op);
3187 /* Return nonzero if OP is (const_int 1), else return zero. */
3190 const_int_1_operand (op, mode)
3192 enum machine_mode mode ATTRIBUTE_UNUSED;
3194 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3197 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3198 for shift & compare patterns, as shifting by 0 does not change flags),
3199 else return zero. */
3202 const_int_1_31_operand (op, mode)
3204 enum machine_mode mode ATTRIBUTE_UNUSED;
3206 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3209 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3210 reference and a constant. */
3213 symbolic_operand (op, mode)
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3217 switch (GET_CODE (op))
3225 if (GET_CODE (op) == SYMBOL_REF
3226 || GET_CODE (op) == LABEL_REF
3227 || (GET_CODE (op) == UNSPEC
3228 && (XINT (op, 1) == UNSPEC_GOT
3229 || XINT (op, 1) == UNSPEC_GOTOFF
3230 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3232 if (GET_CODE (op) != PLUS
3233 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3237 if (GET_CODE (op) == SYMBOL_REF
3238 || GET_CODE (op) == LABEL_REF)
3240 /* Only @GOTOFF gets offsets. */
3241 if (GET_CODE (op) != UNSPEC
3242 || XINT (op, 1) != UNSPEC_GOTOFF)
3245 op = XVECEXP (op, 0, 0);
3246 if (GET_CODE (op) == SYMBOL_REF
3247 || GET_CODE (op) == LABEL_REF)
3256 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3259 pic_symbolic_operand (op, mode)
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3263 if (GET_CODE (op) != CONST)
3268 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3273 if (GET_CODE (op) == UNSPEC)
3275 if (GET_CODE (op) != PLUS
3276 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3279 if (GET_CODE (op) == UNSPEC)
3285 /* Return true if OP is a symbolic operand that resolves locally. */
3288 local_symbolic_operand (op, mode)
3290 enum machine_mode mode ATTRIBUTE_UNUSED;
3292 if (GET_CODE (op) == CONST
3293 && GET_CODE (XEXP (op, 0)) == PLUS
3294 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3295 op = XEXP (XEXP (op, 0), 0);
3297 if (GET_CODE (op) == LABEL_REF)
3300 if (GET_CODE (op) != SYMBOL_REF)
3303 /* These we've been told are local by varasm and encode_section_info
3305 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3308 /* There is, however, a not insubstantial body of code in the rest of
3309 the compiler that assumes it can just stick the results of
3310 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3311 /* ??? This is a hack. Should update the body of the compiler to
3312 always create a DECL an invoke targetm.encode_section_info. */
3313 if (strncmp (XSTR (op, 0), internal_label_prefix,
3314 internal_label_prefix_len) == 0)
3320 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3323 tls_symbolic_operand (op, mode)
3325 enum machine_mode mode ATTRIBUTE_UNUSED;
3327 const char *symbol_str;
3329 if (GET_CODE (op) != SYMBOL_REF)
3331 symbol_str = XSTR (op, 0);
3333 if (symbol_str[0] != '%')
3335 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3339 tls_symbolic_operand_1 (op, kind)
3341 enum tls_model kind;
3343 const char *symbol_str;
3345 if (GET_CODE (op) != SYMBOL_REF)
3347 symbol_str = XSTR (op, 0);
3349 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3353 global_dynamic_symbolic_operand (op, mode)
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3357 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3361 local_dynamic_symbolic_operand (op, mode)
3363 enum machine_mode mode ATTRIBUTE_UNUSED;
3365 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3369 initial_exec_symbolic_operand (op, mode)
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3373 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3377 local_exec_symbolic_operand (op, mode)
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3381 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3384 /* Test for a valid operand for a call instruction. Don't allow the
3385 arg pointer register or virtual regs since they may decay into
3386 reg + const, which the patterns can't handle. */
3389 call_insn_operand (op, mode)
3391 enum machine_mode mode ATTRIBUTE_UNUSED;
3393 /* Disallow indirect through a virtual register. This leads to
3394 compiler aborts when trying to eliminate them. */
3395 if (GET_CODE (op) == REG
3396 && (op == arg_pointer_rtx
3397 || op == frame_pointer_rtx
3398 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3399 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3402 /* Disallow `call 1234'. Due to varying assembler lameness this
3403 gets either rejected or translated to `call .+1234'. */
3404 if (GET_CODE (op) == CONST_INT)
3407 /* Explicitly allow SYMBOL_REF even if pic. */
3408 if (GET_CODE (op) == SYMBOL_REF)
3411 /* Otherwise we can allow any general_operand in the address. */
3412 return general_operand (op, Pmode);
3415 /* Test for a valid operand for a call instruction. Don't allow the
3416 arg pointer register or virtual regs since they may decay into
3417 reg + const, which the patterns can't handle. */
3420 sibcall_insn_operand (op, mode)
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3424 /* Disallow indirect through a virtual register. This leads to
3425 compiler aborts when trying to eliminate them. */
3426 if (GET_CODE (op) == REG
3427 && (op == arg_pointer_rtx
3428 || op == frame_pointer_rtx
3429 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3430 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3433 /* Explicitly allow SYMBOL_REF even if pic. */
3434 if (GET_CODE (op) == SYMBOL_REF)
3437 /* Otherwise we can only allow register operands. */
3438 return register_operand (op, Pmode);
3442 constant_call_address_operand (op, mode)
3444 enum machine_mode mode ATTRIBUTE_UNUSED;
3446 if (GET_CODE (op) == CONST
3447 && GET_CODE (XEXP (op, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3449 op = XEXP (XEXP (op, 0), 0);
3450 return GET_CODE (op) == SYMBOL_REF;
3453 /* Match exactly zero and one. */
3456 const0_operand (op, mode)
3458 enum machine_mode mode;
3460 return op == CONST0_RTX (mode);
3464 const1_operand (op, mode)
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3468 return op == const1_rtx;
3471 /* Match 2, 4, or 8. Used for leal multiplicands. */
3474 const248_operand (op, mode)
3476 enum machine_mode mode ATTRIBUTE_UNUSED;
3478 return (GET_CODE (op) == CONST_INT
3479 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3482 /* True if this is a constant appropriate for an increment or decrement. */
3485 incdec_operand (op, mode)
3487 enum machine_mode mode ATTRIBUTE_UNUSED;
3489 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3490 registers, since carry flag is not set. */
3491 if (TARGET_PENTIUM4 && !optimize_size)
3493 return op == const1_rtx || op == constm1_rtx;
3496 /* Return nonzero if OP is acceptable as operand of DImode shift
3500 shiftdi_operand (op, mode)
3502 enum machine_mode mode ATTRIBUTE_UNUSED;
3505 return nonimmediate_operand (op, mode);
3507 return register_operand (op, mode);
3510 /* Return false if this is the stack pointer, or any other fake
3511 register eliminable to the stack pointer. Otherwise, this is
3514 This is used to prevent esp from being used as an index reg.
3515 Which would only happen in pathological cases. */
3518 reg_no_sp_operand (op, mode)
3520 enum machine_mode mode;
3523 if (GET_CODE (t) == SUBREG)
3525 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3528 return register_operand (op, mode);
3532 mmx_reg_operand (op, mode)
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3536 return MMX_REG_P (op);
3539 /* Return false if this is any eliminable register. Otherwise
3543 general_no_elim_operand (op, mode)
3545 enum machine_mode mode;
3548 if (GET_CODE (t) == SUBREG)
3550 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3551 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3552 || t == virtual_stack_dynamic_rtx)
3555 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3556 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3559 return general_operand (op, mode);
3562 /* Return false if this is any eliminable register. Otherwise
3563 register_operand or const_int. */
3566 nonmemory_no_elim_operand (op, mode)
3568 enum machine_mode mode;
3571 if (GET_CODE (t) == SUBREG)
3573 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3574 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3575 || t == virtual_stack_dynamic_rtx)
3578 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3581 /* Return false if this is any eliminable register or stack register,
3582 otherwise work like register_operand. */
3585 index_register_operand (op, mode)
3587 enum machine_mode mode;
3590 if (GET_CODE (t) == SUBREG)
3594 if (t == arg_pointer_rtx
3595 || t == frame_pointer_rtx
3596 || t == virtual_incoming_args_rtx
3597 || t == virtual_stack_vars_rtx
3598 || t == virtual_stack_dynamic_rtx
3599 || REGNO (t) == STACK_POINTER_REGNUM)
3602 return general_operand (op, mode);
3605 /* Return true if op is a Q_REGS class register. */
3608 q_regs_operand (op, mode)
3610 enum machine_mode mode;
3612 if (mode != VOIDmode && GET_MODE (op) != mode)
3614 if (GET_CODE (op) == SUBREG)
3615 op = SUBREG_REG (op);
3616 return ANY_QI_REG_P (op);
3619 /* Return true if op is an flags register. */
3622 flags_reg_operand (op, mode)
3624 enum machine_mode mode;
3626 if (mode != VOIDmode && GET_MODE (op) != mode)
3628 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3631 /* Return true if op is a NON_Q_REGS class register. */
3634 non_q_regs_operand (op, mode)
3636 enum machine_mode mode;
3638 if (mode != VOIDmode && GET_MODE (op) != mode)
3640 if (GET_CODE (op) == SUBREG)
3641 op = SUBREG_REG (op);
3642 return NON_QI_REG_P (op);
3646 zero_extended_scalar_load_operand (op, mode)
3648 enum machine_mode mode ATTRIBUTE_UNUSED;
3651 if (GET_CODE (op) != MEM)
3653 op = maybe_get_pool_constant (op);
3656 if (GET_CODE (op) != CONST_VECTOR)
3659 (GET_MODE_SIZE (GET_MODE (op)) /
3660 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3661 for (n_elts--; n_elts > 0; n_elts--)
3663 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3664 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3673 sse_comparison_operator (op, mode)
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 enum rtx_code code = GET_CODE (op);
3680 /* Operations supported directly. */
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3697 return !TARGET_IEEE_FP;
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3704 ix86_comparison_operator (op, mode)
3706 enum machine_mode mode;
3708 enum machine_mode inmode;
3709 enum rtx_code code = GET_CODE (op);
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3712 if (GET_RTX_CLASS (code) != '<')
3714 inmode = GET_MODE (XEXP (op, 0));
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3727 if (inmode == CCmode || inmode == CCGCmode
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3732 if (inmode == CCmode)
3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3747 fcmov_comparison_operator (op, mode)
3749 enum machine_mode mode;
3751 enum machine_mode inmode;
3752 enum rtx_code code = GET_CODE (op);
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3755 if (GET_RTX_CLASS (code) != '<')
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3764 code = ix86_fp_compare_code_to_integer (code);
3766 /* i387 supports just limited amount of conditional codes. */
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3773 case ORDERED: case UNORDERED:
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3784 promotable_binary_operator (op, mode)
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3788 switch (GET_CODE (op))
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3810 cmp_fp_expander_operand (op, mode)
3812 enum machine_mode mode;
3814 if (mode != VOIDmode && mode != GET_MODE (op))
3816 if (GET_CODE (op) == CONST_DOUBLE)
3818 return general_operand (op, mode);
3821 /* Match an SI or HImode register for a zero_extract. */
3824 ext_register_operand (op, mode)
3826 enum machine_mode mode ATTRIBUTE_UNUSED;
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3833 if (!register_operand (op, VOIDmode))
3836 /* Be careful to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3845 binary_fp_operator (op, mode)
3847 enum machine_mode mode;
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3852 switch (GET_CODE (op))
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3866 mult_operator (op, mode)
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3870 return GET_CODE (op) == MULT;
3874 div_operator (op, mode)
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3878 return GET_CODE (op) == DIV;
3882 arith_or_logical_operator (op, mode)
3884 enum machine_mode mode;
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3891 /* Returns 1 if OP is memory operand with a displacement. */
3894 memory_displacement_operand (op, mode)
3896 enum machine_mode mode;
3898 struct ix86_address parts;
3900 if (! memory_operand (op, mode))
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3906 return parts.disp != NULL_RTX;
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3916 cmpsi_operand (op, mode)
3918 enum machine_mode mode;
3920 if (nonimmediate_operand (op, mode))
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3940 long_memory_operand (op, mode)
3942 enum machine_mode mode;
3944 if (! memory_operand (op, mode))
3947 return memory_address_length (op) != 0;
3950 /* Return nonzero if the rtx is known aligned. */
3953 aligned_operand (op, mode)
3955 enum machine_mode mode;
3957 struct ix86_address parts;
3959 if (!general_operand (op, mode))
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op) != MEM)
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op))
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3986 /* Look for some component that isn't known to be aligned. */
3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4005 /* Didn't find one -- this must be an aligned address. */
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4014 standard_80387_constant_p (x)
4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4024 if (x == CONST1_RTX (GET_MODE (x)))
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4032 standard_sse_constant_p (x)
4035 if (x == const0_rtx)
4037 return (x == CONST0_RTX (GET_MODE (x)));
4040 /* Returns 1 if OP contains a symbol reference */
4043 symbolic_reference_mentioned_p (op)
4046 register const char *fmt;
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4084 ix86_can_use_return_insn_p ()
4086 struct ix86_frame frame;
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4093 if (! reload_completed || frame_pointer_needed)
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4108 x86_64_sign_extended_value (value)
4111 switch (GET_CODE (value))
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4122 return trunc_int_for_mode (val, SImode) == val;
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4133 /* For certain code models, the code is near as well. */
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
4138 /* We also may accept the offsetted memory references in certain special
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4144 case UNSPEC_GOTPCREL:
4146 case UNSPEC_GOTNTPOFF:
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4158 if (ix86_cmodel == CM_LARGE)
4160 if (GET_CODE (op2) != CONST_INT)
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
4171 && offset < 16*1024*1024
4172 && trunc_int_for_mode (offset, SImode) == offset)
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel == CM_KERNEL
4180 && trunc_int_for_mode (offset, SImode) == offset)
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4187 && offset < 16*1024*1024
4188 && trunc_int_for_mode (offset, SImode) == offset)
4190 if (ix86_cmodel == CM_KERNEL
4192 && trunc_int_for_mode (offset, SImode) == offset)
4196 switch (XINT (op1, 1))
4201 && trunc_int_for_mode (offset, SImode) == offset)
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4217 x86_64_zero_extended_value (value)
4220 switch (GET_CODE (value))
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4235 /* For certain code models, the symbolic references are known to fit. */
4237 return ix86_cmodel == CM_SMALL;
4239 /* For certain code models, the code is near as well. */
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4243 /* We also may accept the offsetted memory references in certain special
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 if (ix86_cmodel == CM_LARGE)
4253 switch (GET_CODE (op1))
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required ()
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4316 if (current_function_profile)
4322 /* Record that the current function accesses previous call frames. */
4325 ix86_setup_frame_addresses ()
4327 cfun->machine->accesses_prev_frame = 1;
4330 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4333 # define USE_HIDDEN_LINKONCE 0
4336 static int pic_labels_used;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4342 get_pc_thunk_name (name, regno)
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4357 ix86_asm_file_end (file)
4363 for (regno = 0; regno < 8; ++regno)
4367 if (! ((pic_labels_used >> regno) & 1))
4370 get_pc_thunk_name (name, regno);
4372 if (USE_HIDDEN_LINKONCE)
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4385 (*targetm.asm_out.globalize_label) (file, name);
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4394 ASM_OUTPUT_LABEL (file, name);
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
4404 /* Emit code for the SET_GOT patterns. */
4407 output_set_got (dest)
4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4422 output_asm_insn ("call\t%a2", xops);
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4433 output_asm_insn ("pop{l}\t%0", xops);
4438 get_pc_thunk_name (name, REGNO (dest));
4439 pic_labels_used |= 1 << REGNO (dest);
4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4448 else if (!TARGET_MACHO)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4454 /* Generate an "push" pattern for input ARG. */
4460 return gen_rtx_SET (VOIDmode,
4462 gen_rtx_PRE_DEC (Pmode,
4463 stack_pointer_rtx)),
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4471 ix86_select_alt_pic_regnum ()
4473 if (current_function_is_leaf && !current_function_profile)
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4481 return INVALID_REGNUM;
4484 /* Return 1 if we need to save REGNO. */
4486 ix86_save_reg (regno, maybe_eh_return)
4488 int maybe_eh_return;
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4493 || current_function_profile
4494 || current_function_calls_eh_return))
4496 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4501 if (current_function_calls_eh_return && maybe_eh_return)
4506 unsigned test = EH_RETURN_DATA_REGNO (i);
4507 if (test == INVALID_REGNUM)
4514 return (regs_ever_live[regno]
4515 && !call_used_regs[regno]
4516 && !fixed_regs[regno]
4517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4520 /* Return number of registers to be saved on the stack. */
4528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4529 if (ix86_save_reg (regno, true))
4534 /* Return the offset between two registers, one to be eliminated, and the other
4535 its replacement, at the start of a routine. */
4538 ix86_initial_elimination_offset (from, to)
4542 struct ix86_frame frame;
4543 ix86_compute_frame_layout (&frame);
4545 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4546 return frame.hard_frame_pointer_offset;
4547 else if (from == FRAME_POINTER_REGNUM
4548 && to == HARD_FRAME_POINTER_REGNUM)
4549 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4552 if (to != STACK_POINTER_REGNUM)
4554 else if (from == ARG_POINTER_REGNUM)
4555 return frame.stack_pointer_offset;
4556 else if (from != FRAME_POINTER_REGNUM)
4559 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4563 /* Fill structure ix86_frame about frame of currently computed function. */
4566 ix86_compute_frame_layout (frame)
4567 struct ix86_frame *frame;
4569 HOST_WIDE_INT total_size;
4570 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4572 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4573 HOST_WIDE_INT size = get_frame_size ();
4575 frame->nregs = ix86_nsaved_regs ();
4578 /* Skip return address and saved base pointer. */
4579 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4581 frame->hard_frame_pointer_offset = offset;
4583 /* Do some sanity checking of stack_alignment_needed and
4584 preferred_alignment, since i386 port is the only using those features
4585 that may break easily. */
4587 if (size && !stack_alignment_needed)
4589 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4591 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4593 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4596 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4597 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4599 /* Register save area */
4600 offset += frame->nregs * UNITS_PER_WORD;
4603 if (ix86_save_varrargs_registers)
4605 offset += X86_64_VARARGS_SIZE;
4606 frame->va_arg_size = X86_64_VARARGS_SIZE;
4609 frame->va_arg_size = 0;
4611 /* Align start of frame for local function. */
4612 frame->padding1 = ((offset + stack_alignment_needed - 1)
4613 & -stack_alignment_needed) - offset;
4615 offset += frame->padding1;
4617 /* Frame pointer points here. */
4618 frame->frame_pointer_offset = offset;
4622 /* Add outgoing arguments area. Can be skipped if we eliminated
4623 all the function calls as dead code. */
4624 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4626 offset += current_function_outgoing_args_size;
4627 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630 frame->outgoing_arguments_size = 0;
4632 /* Align stack boundary. Only needed if we're calling another function
4634 if (!current_function_is_leaf || current_function_calls_alloca)
4635 frame->padding2 = ((offset + preferred_alignment - 1)
4636 & -preferred_alignment) - offset;
4638 frame->padding2 = 0;
4640 offset += frame->padding2;
4642 /* We've reached end of stack frame. */
4643 frame->stack_pointer_offset = offset;
4645 /* Size prologue needs to allocate. */
4646 frame->to_allocate =
4647 (size + frame->padding1 + frame->padding2
4648 + frame->outgoing_arguments_size + frame->va_arg_size);
4650 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4651 && current_function_is_leaf)
4653 frame->red_zone_size = frame->to_allocate;
4654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658 frame->red_zone_size = 0;
4659 frame->to_allocate -= frame->red_zone_size;
4660 frame->stack_pointer_offset -= frame->red_zone_size;
4662 fprintf (stderr, "nregs: %i\n", frame->nregs);
4663 fprintf (stderr, "size: %i\n", size);
4664 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4665 fprintf (stderr, "padding1: %i\n", frame->padding1);
4666 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4667 fprintf (stderr, "padding2: %i\n", frame->padding2);
4668 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4669 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4670 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4671 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4672 frame->hard_frame_pointer_offset);
4673 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4677 /* Emit code to save registers in the prologue. */
4680 ix86_emit_save_regs ()
4685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4686 if (ix86_save_reg (regno, true))
4688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4689 RTX_FRAME_RELATED_P (insn) = 1;
4693 /* Emit code to save registers using MOV insns. First register
4694 is restored from POINTER + OFFSET. */
4696 ix86_emit_save_regs_using_mov (pointer, offset)
4698 HOST_WIDE_INT offset;
4703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4704 if (ix86_save_reg (regno, true))
4706 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4708 gen_rtx_REG (Pmode, regno));
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 offset += UNITS_PER_WORD;
4714 /* Expand the prologue into a bunch of separate insns. */
4717 ix86_expand_prologue ()
4721 struct ix86_frame frame;
4723 HOST_WIDE_INT allocate;
4725 ix86_compute_frame_layout (&frame);
4728 int count = frame.nregs;
4730 /* The fast prologue uses move instead of push to save registers. This
4731 is significantly longer, but also executes faster as modern hardware
4732 can execute the moves in parallel, but can't do that for push/pop.
4734 Be careful about choosing what prologue to emit: When function takes
4735 many instructions to execute we may use slow version as well as in
4736 case function is known to be outside hot spot (this is known with
4737 feedback only). Weight the size of function by number of registers
4738 to save as it is cheap to use one or two push instructions but very
4739 slow to use many of them. */
4741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4742 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4743 || (flag_branch_probabilities
4744 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4745 use_fast_prologue_epilogue = 0;
4747 use_fast_prologue_epilogue = !expensive_function_p (count);
4748 if (TARGET_PROLOGUE_USING_MOVE)
4749 use_mov = use_fast_prologue_epilogue;
4752 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4753 slower on all targets. Also sdb doesn't like it. */
4755 if (frame_pointer_needed)
4757 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4758 RTX_FRAME_RELATED_P (insn) = 1;
4760 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4761 RTX_FRAME_RELATED_P (insn) = 1;
4764 allocate = frame.to_allocate;
4765 /* In case we are dealing only with single register and empty frame,
4766 push is equivalent of the mov+add sequence. */
4767 if (allocate == 0 && frame.nregs <= 1)
4771 ix86_emit_save_regs ();
4773 allocate += frame.nregs * UNITS_PER_WORD;
4777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack
4780 (stack_pointer_rtx, stack_pointer_rtx,
4781 GEN_INT (-allocate)));
4782 RTX_FRAME_RELATED_P (insn) = 1;
4786 /* ??? Is this only valid for Win32? */
4793 arg0 = gen_rtx_REG (SImode, 0);
4794 emit_move_insn (arg0, GEN_INT (allocate));
4796 sym = gen_rtx_MEM (FUNCTION_MODE,
4797 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4798 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4800 CALL_INSN_FUNCTION_USAGE (insn)
4801 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4802 CALL_INSN_FUNCTION_USAGE (insn));
4806 if (!frame_pointer_needed || !frame.to_allocate)
4807 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4809 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4810 -frame.nregs * UNITS_PER_WORD);
4813 #ifdef SUBTARGET_PROLOGUE
4817 pic_reg_used = false;
4818 if (pic_offset_table_rtx
4819 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4820 || current_function_profile))
4822 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4824 if (alt_pic_reg_used != INVALID_REGNUM)
4825 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4827 pic_reg_used = true;
4832 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4834 /* Even with accurate pre-reload life analysis, we can wind up
4835 deleting all references to the pic register after reload.
4836 Consider if cross-jumping unifies two sides of a branch
4837 controlled by a comparison vs the only read from a global.
4838 In which case, allow the set_got to be deleted, though we're
4839 too late to do anything about the ebx save in the prologue. */
4840 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4843 /* Prevent function calls from be scheduled before the call to mcount.
4844 In the pic_reg_used case, make sure that the got load isn't deleted. */
4845 if (current_function_profile)
4846 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4849 /* Emit code to restore saved registers using MOV insns. First register
4850 is restored from POINTER + OFFSET. */
4852 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4855 int maybe_eh_return;
4859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4860 if (ix86_save_reg (regno, maybe_eh_return))
4862 emit_move_insn (gen_rtx_REG (Pmode, regno),
4863 adjust_address (gen_rtx_MEM (Pmode, pointer),
4865 offset += UNITS_PER_WORD;
4869 /* Restore function stack, frame, and registers. */
4872 ix86_expand_epilogue (style)
4876 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4877 struct ix86_frame frame;
4878 HOST_WIDE_INT offset;
4880 ix86_compute_frame_layout (&frame);
4882 /* Calculate start of saved registers relative to ebp. Special care
4883 must be taken for the normal return case of a function using
4884 eh_return: the eax and edx registers are marked as saved, but not
4885 restored along this path. */
4886 offset = frame.nregs;
4887 if (current_function_calls_eh_return && style != 2)
4889 offset *= -UNITS_PER_WORD;
4891 /* If we're only restoring one register and sp is not valid then
4892 using a move instruction to restore the register since it's
4893 less work than reloading sp and popping the register.
4895 The default code result in stack adjustment using add/lea instruction,
4896 while this code results in LEAVE instruction (or discrete equivalent),
4897 so it is profitable in some other cases as well. Especially when there
4898 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4899 and there is exactly one register to pop. This heuristic may need some
4900 tuning in future. */
4901 if ((!sp_valid && frame.nregs <= 1)
4902 || (TARGET_EPILOGUE_USING_MOVE
4903 && use_fast_prologue_epilogue
4904 && (frame.nregs > 1 || frame.to_allocate))
4905 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4906 || (frame_pointer_needed && TARGET_USE_LEAVE
4907 && use_fast_prologue_epilogue && frame.nregs == 1)
4908 || current_function_calls_eh_return)
4910 /* Restore registers. We can use ebp or esp to address the memory
4911 locations. If both are available, default to ebp, since offsets
4912 are known to be small. Only exception is esp pointing directly to the
4913 end of block of saved registers, where we may simplify addressing
4916 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4917 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4918 frame.to_allocate, style == 2);
4920 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4921 offset, style == 2);
4923 /* eh_return epilogues need %ecx added to the stack pointer. */
4926 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4928 if (frame_pointer_needed)
4930 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4931 tmp = plus_constant (tmp, UNITS_PER_WORD);
4932 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4934 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4935 emit_move_insn (hard_frame_pointer_rtx, tmp);
4937 emit_insn (gen_pro_epilogue_adjust_stack
4938 (stack_pointer_rtx, sa, const0_rtx));
4942 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4943 tmp = plus_constant (tmp, (frame.to_allocate
4944 + frame.nregs * UNITS_PER_WORD));
4945 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4948 else if (!frame_pointer_needed)
4949 emit_insn (gen_pro_epilogue_adjust_stack
4950 (stack_pointer_rtx, stack_pointer_rtx,
4951 GEN_INT (frame.to_allocate
4952 + frame.nregs * UNITS_PER_WORD)));
4953 /* If not an i386, mov & pop is faster than "leave". */
4954 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4955 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4958 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4959 hard_frame_pointer_rtx,
4962 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4964 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4969 /* First step is to deallocate the stack frame so that we can
4970 pop the registers. */
4973 if (!frame_pointer_needed)
4975 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4976 hard_frame_pointer_rtx,
4979 else if (frame.to_allocate)
4980 emit_insn (gen_pro_epilogue_adjust_stack
4981 (stack_pointer_rtx, stack_pointer_rtx,
4982 GEN_INT (frame.to_allocate)));
4984 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4985 if (ix86_save_reg (regno, false))
4988 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4990 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4992 if (frame_pointer_needed)
4994 /* Leave results in shorter dependency chains on CPUs that are
4995 able to grok it fast. */
4996 if (TARGET_USE_LEAVE)
4997 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4998 else if (TARGET_64BIT)
4999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5005 /* Sibcall epilogues don't want a return instruction. */
5009 if (current_function_pops_args && current_function_args_size)
5011 rtx popc = GEN_INT (current_function_pops_args);
5013 /* i386 can only pop 64K bytes. If asked to pop more, pop
5014 return address, do explicit add, and jump indirectly to the
5017 if (current_function_pops_args >= 65536)
5019 rtx ecx = gen_rtx_REG (SImode, 2);
5021 /* There are is no "pascal" calling convention in 64bit ABI. */
5025 emit_insn (gen_popsi1 (ecx));
5026 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5027 emit_jump_insn (gen_return_indirect_internal (ecx));
5030 emit_jump_insn (gen_return_pop_internal (popc));
5033 emit_jump_insn (gen_return_internal ());
5036 /* Reset from the function's potential modifications. */
5039 ix86_output_function_epilogue (file, size)
5040 FILE *file ATTRIBUTE_UNUSED;
5041 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5043 if (pic_offset_table_rtx)
5044 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5047 /* Extract the parts of an RTL expression that is a valid memory address
5048 for an instruction. Return 0 if the structure of the address is
5049 grossly off. Return -1 if the address contains ASHIFT, so it is not
5050 strictly valid, but still used for computing length of lea instruction.
5054 ix86_decompose_address (addr, out)
5056 struct ix86_address *out;
5058 rtx base = NULL_RTX;
5059 rtx index = NULL_RTX;
5060 rtx disp = NULL_RTX;
5061 HOST_WIDE_INT scale = 1;
5062 rtx scale_rtx = NULL_RTX;
5065 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5067 else if (GET_CODE (addr) == PLUS)
5069 rtx op0 = XEXP (addr, 0);
5070 rtx op1 = XEXP (addr, 1);
5071 enum rtx_code code0 = GET_CODE (op0);
5072 enum rtx_code code1 = GET_CODE (op1);
5074 if (code0 == REG || code0 == SUBREG)
5076 if (code1 == REG || code1 == SUBREG)
5077 index = op0, base = op1; /* index + base */
5079 base = op0, disp = op1; /* base + displacement */
5081 else if (code0 == MULT)
5083 index = XEXP (op0, 0);
5084 scale_rtx = XEXP (op0, 1);
5085 if (code1 == REG || code1 == SUBREG)
5086 base = op1; /* index*scale + base */
5088 disp = op1; /* index*scale + disp */
5090 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5092 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5093 scale_rtx = XEXP (XEXP (op0, 0), 1);
5094 base = XEXP (op0, 1);
5097 else if (code0 == PLUS)
5099 index = XEXP (op0, 0); /* index + base + disp */
5100 base = XEXP (op0, 1);
5106 else if (GET_CODE (addr) == MULT)
5108 index = XEXP (addr, 0); /* index*scale */
5109 scale_rtx = XEXP (addr, 1);
5111 else if (GET_CODE (addr) == ASHIFT)
5115 /* We're called for lea too, which implements ashift on occasion. */
5116 index = XEXP (addr, 0);
5117 tmp = XEXP (addr, 1);
5118 if (GET_CODE (tmp) != CONST_INT)
5120 scale = INTVAL (tmp);
5121 if ((unsigned HOST_WIDE_INT) scale > 3)
5127 disp = addr; /* displacement */
5129 /* Extract the integral value of scale. */
5132 if (GET_CODE (scale_rtx) != CONST_INT)
5134 scale = INTVAL (scale_rtx);
5137 /* Allow arg pointer and stack pointer as index if there is not scaling */
5138 if (base && index && scale == 1
5139 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5140 || index == stack_pointer_rtx))
5147 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5148 if ((base == hard_frame_pointer_rtx
5149 || base == frame_pointer_rtx
5150 || base == arg_pointer_rtx) && !disp)
5153 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5154 Avoid this by transforming to [%esi+0]. */
5155 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5156 && base && !index && !disp
5158 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5161 /* Special case: encode reg+reg instead of reg*2. */
5162 if (!base && index && scale && scale == 2)
5163 base = index, scale = 1;
5165 /* Special case: scaling cannot be encoded without base or displacement. */
5166 if (!base && !disp && index && scale != 1)
5177 /* Return cost of the memory address x.
5178 For i386, it is better to use a complex address than let gcc copy
5179 the address into a reg and make a new pseudo. But not if the address
5180 requires to two regs - that would mean more pseudos with longer
5183 ix86_address_cost (x)
5186 struct ix86_address parts;
5189 if (!ix86_decompose_address (x, &parts))
5192 if (parts.base && GET_CODE (parts.base) == SUBREG)
5193 parts.base = SUBREG_REG (parts.base);
5194 if (parts.index && GET_CODE (parts.index) == SUBREG)
5195 parts.index = SUBREG_REG (parts.index);
5197 /* More complex memory references are better. */
5198 if (parts.disp && parts.disp != const0_rtx)
5201 /* Attempt to minimize number of registers in the address. */
5203 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5205 && (!REG_P (parts.index)
5206 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5210 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5212 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5213 && parts.base != parts.index)
5216 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5217 since it's predecode logic can't detect the length of instructions
5218 and it degenerates to vector decoded. Increase cost of such
5219 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5220 to split such addresses or even refuse such addresses at all.
5222 Following addressing modes are affected:
5227 The first and last case may be avoidable by explicitly coding the zero in
5228 memory address, but I don't have AMD-K6 machine handy to check this
5232 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5233 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5234 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5240 /* If X is a machine specific address (i.e. a symbol or label being
5241 referenced as a displacement from the GOT implemented using an
5242 UNSPEC), then return the base term. Otherwise return X. */
5245 ix86_find_base_term (x)
5252 if (GET_CODE (x) != CONST)
5255 if (GET_CODE (term) == PLUS
5256 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5257 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5258 term = XEXP (term, 0);
5259 if (GET_CODE (term) != UNSPEC
5260 || XINT (term, 1) != UNSPEC_GOTPCREL)
5263 term = XVECEXP (term, 0, 0);
5265 if (GET_CODE (term) != SYMBOL_REF
5266 && GET_CODE (term) != LABEL_REF)
5272 if (GET_CODE (x) != PLUS
5273 || XEXP (x, 0) != pic_offset_table_rtx
5274 || GET_CODE (XEXP (x, 1)) != CONST)
5277 term = XEXP (XEXP (x, 1), 0);
5279 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5280 term = XEXP (term, 0);
5282 if (GET_CODE (term) != UNSPEC
5283 || XINT (term, 1) != UNSPEC_GOTOFF)
5286 term = XVECEXP (term, 0, 0);
5288 if (GET_CODE (term) != SYMBOL_REF
5289 && GET_CODE (term) != LABEL_REF)
5295 /* Determine if a given RTX is a valid constant. We already know this
5296 satisfies CONSTANT_P. */
5299 legitimate_constant_p (x)
5304 switch (GET_CODE (x))
5307 /* TLS symbols are not constant. */
5308 if (tls_symbolic_operand (x, Pmode))
5313 inner = XEXP (x, 0);
5315 /* Offsets of TLS symbols are never valid.
5316 Discourage CSE from creating them. */
5317 if (GET_CODE (inner) == PLUS
5318 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5321 /* Only some unspecs are valid as "constants". */
5322 if (GET_CODE (inner) == UNSPEC)
5323 switch (XINT (inner, 1))
5326 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5336 /* Otherwise we handle everything else in the move patterns. */
5340 /* Determine if it's legal to put X into the constant pool. This
5341 is not possible for the address of thread-local symbols, which
5342 is checked above. */
5345 ix86_cannot_force_const_mem (x)
5348 return !legitimate_constant_p (x);
5351 /* Determine if a given RTX is a valid constant address. */
5354 constant_address_p (x)
5357 switch (GET_CODE (x))
5364 return TARGET_64BIT;
5367 /* For Mach-O, really believe the CONST. */
5370 /* Otherwise fall through. */
5372 return !flag_pic && legitimate_constant_p (x);
5379 /* Nonzero if the constant value X is a legitimate general operand
5380 when generating PIC code. It is given that flag_pic is on and
5381 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5384 legitimate_pic_operand_p (x)
5389 switch (GET_CODE (x))
5392 inner = XEXP (x, 0);
5394 /* Only some unspecs are valid as "constants". */
5395 if (GET_CODE (inner) == UNSPEC)
5396 switch (XINT (inner, 1))
5399 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5407 return legitimate_pic_address_disp_p (x);
5414 /* Determine if a given CONST RTX is a valid memory displacement
5418 legitimate_pic_address_disp_p (disp)
5423 /* In 64bit mode we can allow direct addresses of symbols and labels
5424 when they are not dynamic symbols. */
5427 /* TLS references should always be enclosed in UNSPEC. */
5428 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5430 if (GET_CODE (disp) == SYMBOL_REF
5431 && ix86_cmodel == CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (disp)
5433 || SYMBOL_REF_FLAG (disp)))
5435 if (GET_CODE (disp) == LABEL_REF)
5437 if (GET_CODE (disp) == CONST
5438 && GET_CODE (XEXP (disp, 0)) == PLUS
5439 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5440 && ix86_cmodel == CM_SMALL_PIC
5441 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5442 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5443 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5444 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5445 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5446 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5449 if (GET_CODE (disp) != CONST)
5451 disp = XEXP (disp, 0);
5455 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5456 of GOT tables. We should not need these anyway. */
5457 if (GET_CODE (disp) != UNSPEC
5458 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5461 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5462 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5468 if (GET_CODE (disp) == PLUS)
5470 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5472 disp = XEXP (disp, 0);
5476 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5477 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5479 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5480 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5481 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5483 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5484 if (strstr (sym_name, "$pb") != 0)
5489 if (GET_CODE (disp) != UNSPEC)
5492 switch (XINT (disp, 1))
5497 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5499 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5500 case UNSPEC_GOTTPOFF:
5501 case UNSPEC_GOTNTPOFF:
5502 case UNSPEC_INDNTPOFF:
5505 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5507 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5509 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5515 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5516 memory address for an instruction. The MODE argument is the machine mode
5517 for the MEM expression that wants to use this address.
5519 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5520 convert common non-canonical forms to canonical form so that they will
5524 legitimate_address_p (mode, addr, strict)
5525 enum machine_mode mode;
5529 struct ix86_address parts;
5530 rtx base, index, disp;
5531 HOST_WIDE_INT scale;
5532 const char *reason = NULL;
5533 rtx reason_rtx = NULL_RTX;
5535 if (TARGET_DEBUG_ADDR)
5538 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5539 GET_MODE_NAME (mode), strict);
5543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5545 if (TARGET_DEBUG_ADDR)
5546 fprintf (stderr, "Success.\n");
5550 if (ix86_decompose_address (addr, &parts) <= 0)
5552 reason = "decomposition failed";
5557 index = parts.index;
5559 scale = parts.scale;
5561 /* Validate base register.
5563 Don't allow SUBREG's here, it can lead to spill failures when the base
5564 is one word out of a two word structure, which is represented internally
5572 if (GET_CODE (base) == SUBREG)
5573 reg = SUBREG_REG (base);
5577 if (GET_CODE (reg) != REG)
5579 reason = "base is not a register";
5583 if (GET_MODE (base) != Pmode)
5585 reason = "base is not in Pmode";
5589 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5590 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5592 reason = "base is not valid";
5597 /* Validate index register.
5599 Don't allow SUBREG's here, it can lead to spill failures when the index
5600 is one word out of a two word structure, which is represented internally
5608 if (GET_CODE (index) == SUBREG)
5609 reg = SUBREG_REG (index);
5613 if (GET_CODE (reg) != REG)
5615 reason = "index is not a register";
5619 if (GET_MODE (index) != Pmode)
5621 reason = "index is not in Pmode";
5625 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5626 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5628 reason = "index is not valid";
5633 /* Validate scale factor. */
5636 reason_rtx = GEN_INT (scale);
5639 reason = "scale without index";
5643 if (scale != 2 && scale != 4 && scale != 8)
5645 reason = "scale is not a valid multiplier";
5650 /* Validate displacement. */
5655 if (GET_CODE (disp) == CONST
5656 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5657 switch (XINT (XEXP (disp, 0), 1))
5661 case UNSPEC_GOTPCREL:
5664 goto is_legitimate_pic;
5666 case UNSPEC_GOTTPOFF:
5667 case UNSPEC_GOTNTPOFF:
5668 case UNSPEC_INDNTPOFF:
5674 reason = "invalid address unspec";
5678 else if (flag_pic && (SYMBOLIC_CONST (disp)
5680 && !machopic_operand_p (disp)
5685 if (TARGET_64BIT && (index || base))
5687 /* foo@dtpoff(%rX) is ok. */
5688 if (GET_CODE (disp) != CONST
5689 || GET_CODE (XEXP (disp, 0)) != PLUS
5690 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5691 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5692 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5693 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5695 reason = "non-constant pic memory reference";
5699 else if (! legitimate_pic_address_disp_p (disp))
5701 reason = "displacement is an invalid pic construct";
5705 /* This code used to verify that a symbolic pic displacement
5706 includes the pic_offset_table_rtx register.
5708 While this is good idea, unfortunately these constructs may
5709 be created by "adds using lea" optimization for incorrect
5718 This code is nonsensical, but results in addressing
5719 GOT table with pic_offset_table_rtx base. We can't
5720 just refuse it easily, since it gets matched by
5721 "addsi3" pattern, that later gets split to lea in the
5722 case output register differs from input. While this
5723 can be handled by separate addsi pattern for this case
5724 that never results in lea, this seems to be easier and
5725 correct fix for crash to disable this test. */
5727 else if (!CONSTANT_ADDRESS_P (disp))
5729 reason = "displacement is not constant";
5732 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5734 reason = "displacement is out of range";
5737 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5739 reason = "displacement is a const_double";
5744 /* Everything looks valid. */
5745 if (TARGET_DEBUG_ADDR)
5746 fprintf (stderr, "Success.\n");
5750 if (TARGET_DEBUG_ADDR)
5752 fprintf (stderr, "Error: %s\n", reason);
5753 debug_rtx (reason_rtx);
5758 /* Return an unique alias set for the GOT. */
5760 static HOST_WIDE_INT
5761 ix86_GOT_alias_set ()
5763 static HOST_WIDE_INT set = -1;
5765 set = new_alias_set ();
5769 /* Return a legitimate reference for ORIG (an address) using the
5770 register REG. If REG is 0, a new pseudo is generated.
5772 There are two types of references that must be handled:
5774 1. Global data references must load the address from the GOT, via
5775 the PIC reg. An insn is emitted to do this load, and the reg is
5778 2. Static data references, constant pool addresses, and code labels
5779 compute the address as an offset from the GOT, whose base is in
5780 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5781 differentiate them from global data objects. The returned
5782 address is the PIC reg + an unspec constant.
5784 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5785 reg also appears in the address. */
5788 legitimize_pic_address (orig, reg)
5798 reg = gen_reg_rtx (Pmode);
5799 /* Use the generic Mach-O PIC machinery. */
5800 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5803 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5805 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5807 /* This symbol may be referenced via a displacement from the PIC
5808 base address (@GOTOFF). */
5810 if (reload_in_progress)
5811 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5812 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5813 new = gen_rtx_CONST (Pmode, new);
5814 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5818 emit_move_insn (reg, new);
5822 else if (GET_CODE (addr) == SYMBOL_REF)
5826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5827 new = gen_rtx_CONST (Pmode, new);
5828 new = gen_rtx_MEM (Pmode, new);
5829 RTX_UNCHANGING_P (new) = 1;
5830 set_mem_alias_set (new, ix86_GOT_alias_set ());
5833 reg = gen_reg_rtx (Pmode);
5834 /* Use directly gen_movsi, otherwise the address is loaded
5835 into register for CSE. We don't want to CSE this addresses,
5836 instead we CSE addresses from the GOT table, so skip this. */
5837 emit_insn (gen_movsi (reg, new));
5842 /* This symbol must be referenced via a load from the
5843 Global Offset Table (@GOT). */
5845 if (reload_in_progress)
5846 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5847 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5848 new = gen_rtx_CONST (Pmode, new);
5849 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5850 new = gen_rtx_MEM (Pmode, new);
5851 RTX_UNCHANGING_P (new) = 1;
5852 set_mem_alias_set (new, ix86_GOT_alias_set ());
5855 reg = gen_reg_rtx (Pmode);
5856 emit_move_insn (reg, new);
5862 if (GET_CODE (addr) == CONST)
5864 addr = XEXP (addr, 0);
5866 /* We must match stuff we generate before. Assume the only
5867 unspecs that can get here are ours. Not that we could do
5868 anything with them anyway... */
5869 if (GET_CODE (addr) == UNSPEC
5870 || (GET_CODE (addr) == PLUS
5871 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5873 if (GET_CODE (addr) != PLUS)
5876 if (GET_CODE (addr) == PLUS)
5878 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5880 /* Check first to see if this is a constant offset from a @GOTOFF
5881 symbol reference. */
5882 if (local_symbolic_operand (op0, Pmode)
5883 && GET_CODE (op1) == CONST_INT)
5887 if (reload_in_progress)
5888 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5889 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5891 new = gen_rtx_PLUS (Pmode, new, op1);
5892 new = gen_rtx_CONST (Pmode, new);
5893 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5897 emit_move_insn (reg, new);
5903 if (INTVAL (op1) < -16*1024*1024
5904 || INTVAL (op1) >= 16*1024*1024)
5905 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5910 base = legitimize_pic_address (XEXP (addr, 0), reg);
5911 new = legitimize_pic_address (XEXP (addr, 1),
5912 base == reg ? NULL_RTX : reg);
5914 if (GET_CODE (new) == CONST_INT)
5915 new = plus_constant (base, INTVAL (new));
5918 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5920 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5921 new = XEXP (new, 1);
5923 new = gen_rtx_PLUS (Pmode, base, new);
5932 ix86_encode_section_info (decl, first)
5934 int first ATTRIBUTE_UNUSED;
5936 bool local_p = (*targetm.binds_local_p) (decl);
5939 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5940 if (GET_CODE (rtl) != MEM)
5942 symbol = XEXP (rtl, 0);
5943 if (GET_CODE (symbol) != SYMBOL_REF)
5946 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5947 symbol so that we may access it directly in the GOT. */
5950 SYMBOL_REF_FLAG (symbol) = local_p;
5952 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5953 "local dynamic", "initial exec" or "local exec" TLS models
5956 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5958 const char *symbol_str;
5961 enum tls_model kind = decl_tls_model (decl);
5963 if (TARGET_64BIT && ! flag_pic)
5965 /* x86-64 doesn't allow non-pic code for shared libraries,
5966 so don't generate GD/LD TLS models for non-pic code. */
5969 case TLS_MODEL_GLOBAL_DYNAMIC:
5970 kind = TLS_MODEL_INITIAL_EXEC; break;
5971 case TLS_MODEL_LOCAL_DYNAMIC:
5972 kind = TLS_MODEL_LOCAL_EXEC; break;
5978 symbol_str = XSTR (symbol, 0);
5980 if (symbol_str[0] == '%')
5982 if (symbol_str[1] == tls_model_chars[kind])
5986 len = strlen (symbol_str) + 1;
5987 newstr = alloca (len + 2);
5990 newstr[1] = tls_model_chars[kind];
5991 memcpy (newstr + 2, symbol_str, len);
5993 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5997 /* Undo the above when printing symbol names. */
6000 ix86_strip_name_encoding (str)
6010 /* Load the thread pointer into a register. */
6013 get_thread_pointer ()
6017 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6018 tp = gen_rtx_MEM (Pmode, tp);
6019 RTX_UNCHANGING_P (tp) = 1;
6020 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6021 tp = force_reg (Pmode, tp);
6026 /* Try machine-dependent ways of modifying an illegitimate address
6027 to be legitimate. If we find one, return the new, valid address.
6028 This macro is used in only one place: `memory_address' in explow.c.
6030 OLDX is the address as it was before break_out_memory_refs was called.
6031 In some cases it is useful to look at this to decide what needs to be done.
6033 MODE and WIN are passed so that this macro can use
6034 GO_IF_LEGITIMATE_ADDRESS.
6036 It is always safe for this macro to do nothing. It exists to recognize
6037 opportunities to optimize the output.
6039 For the 80386, we handle X+REG by loading X into a register R and
6040 using R+REG. R will go in a general reg and indexing will be used.
6041 However, if REG is a broken-out memory address or multiplication,
6042 nothing needs to be done because REG can certainly go in a general reg.
6044 When -fpic is used, special handling is needed for symbolic references.
6045 See comments by legitimize_pic_address in i386.c for details. */
6048 legitimize_address (x, oldx, mode)
6050 register rtx oldx ATTRIBUTE_UNUSED;
6051 enum machine_mode mode;
6056 if (TARGET_DEBUG_ADDR)
6058 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6059 GET_MODE_NAME (mode));
6063 log = tls_symbolic_operand (x, mode);
6066 rtx dest, base, off, pic;
6071 case TLS_MODEL_GLOBAL_DYNAMIC:
6072 dest = gen_reg_rtx (Pmode);
6075 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6078 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6079 insns = get_insns ();
6082 emit_libcall_block (insns, dest, rax, x);
6085 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6088 case TLS_MODEL_LOCAL_DYNAMIC:
6089 base = gen_reg_rtx (Pmode);
6092 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6095 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6096 insns = get_insns ();
6099 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6100 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6101 emit_libcall_block (insns, base, rax, note);
6104 emit_insn (gen_tls_local_dynamic_base_32 (base));
6106 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6107 off = gen_rtx_CONST (Pmode, off);
6109 return gen_rtx_PLUS (Pmode, base, off);
6111 case TLS_MODEL_INITIAL_EXEC:
6115 type = UNSPEC_GOTNTPOFF;
6119 if (reload_in_progress)
6120 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6121 pic = pic_offset_table_rtx;
6122 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6124 else if (!TARGET_GNU_TLS)
6126 pic = gen_reg_rtx (Pmode);
6127 emit_insn (gen_set_got (pic));
6128 type = UNSPEC_GOTTPOFF;
6133 type = UNSPEC_INDNTPOFF;
6136 base = get_thread_pointer ();
6138 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6139 off = gen_rtx_CONST (Pmode, off);
6141 off = gen_rtx_PLUS (Pmode, pic, off);
6142 off = gen_rtx_MEM (Pmode, off);
6143 RTX_UNCHANGING_P (off) = 1;
6144 set_mem_alias_set (off, ix86_GOT_alias_set ());
6145 dest = gen_reg_rtx (Pmode);
6147 if (TARGET_64BIT || TARGET_GNU_TLS)
6149 emit_move_insn (dest, off);
6150 return gen_rtx_PLUS (Pmode, base, dest);
6153 emit_insn (gen_subsi3 (dest, base, off));
6156 case TLS_MODEL_LOCAL_EXEC:
6157 base = get_thread_pointer ();
6159 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6160 (TARGET_64BIT || TARGET_GNU_TLS)
6161 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6162 off = gen_rtx_CONST (Pmode, off);
6164 if (TARGET_64BIT || TARGET_GNU_TLS)
6165 return gen_rtx_PLUS (Pmode, base, off);
6168 dest = gen_reg_rtx (Pmode);
6169 emit_insn (gen_subsi3 (dest, base, off));
6180 if (flag_pic && SYMBOLIC_CONST (x))
6181 return legitimize_pic_address (x, 0);
6183 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6184 if (GET_CODE (x) == ASHIFT
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
6186 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6189 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6190 GEN_INT (1 << log));
6193 if (GET_CODE (x) == PLUS)
6195 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6197 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6198 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6199 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6202 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6203 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6204 GEN_INT (1 << log));
6207 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6208 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6209 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6212 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6213 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6214 GEN_INT (1 << log));
6217 /* Put multiply first if it isn't already. */
6218 if (GET_CODE (XEXP (x, 1)) == MULT)
6220 rtx tmp = XEXP (x, 0);
6221 XEXP (x, 0) = XEXP (x, 1);
6226 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6227 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6228 created by virtual register instantiation, register elimination, and
6229 similar optimizations. */
6230 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6233 x = gen_rtx_PLUS (Pmode,
6234 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6235 XEXP (XEXP (x, 1), 0)),
6236 XEXP (XEXP (x, 1), 1));
6240 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6241 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6242 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6245 && CONSTANT_P (XEXP (x, 1)))
6248 rtx other = NULL_RTX;
6250 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6252 constant = XEXP (x, 1);
6253 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6255 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6257 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6258 other = XEXP (x, 1);
6266 x = gen_rtx_PLUS (Pmode,
6267 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6268 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6269 plus_constant (other, INTVAL (constant)));
6273 if (changed && legitimate_address_p (mode, x, FALSE))
6276 if (GET_CODE (XEXP (x, 0)) == MULT)
6279 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6282 if (GET_CODE (XEXP (x, 1)) == MULT)
6285 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6289 && GET_CODE (XEXP (x, 1)) == REG
6290 && GET_CODE (XEXP (x, 0)) == REG)
6293 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6296 x = legitimize_pic_address (x, 0);
6299 if (changed && legitimate_address_p (mode, x, FALSE))
6302 if (GET_CODE (XEXP (x, 0)) == REG)
6304 register rtx temp = gen_reg_rtx (Pmode);
6305 register rtx val = force_operand (XEXP (x, 1), temp);
6307 emit_move_insn (temp, val);
6313 else if (GET_CODE (XEXP (x, 1)) == REG)
6315 register rtx temp = gen_reg_rtx (Pmode);
6316 register rtx val = force_operand (XEXP (x, 0), temp);
6318 emit_move_insn (temp, val);
6328 /* Print an integer constant expression in assembler syntax. Addition
6329 and subtraction are the only arithmetic that may appear in these
6330 expressions. FILE is the stdio stream to write to, X is the rtx, and
6331 CODE is the operand print code from the output string. */
6334 output_pic_addr_const (file, x, code)
6341 switch (GET_CODE (x))
6351 assemble_name (file, XSTR (x, 0));
6352 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6353 fputs ("@PLT", file);
6360 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6361 assemble_name (asm_out_file, buf);
6365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6369 /* This used to output parentheses around the expression,
6370 but that does not work on the 386 (either ATT or BSD assembler). */
6371 output_pic_addr_const (file, XEXP (x, 0), code);
6375 if (GET_MODE (x) == VOIDmode)
6377 /* We can use %d if the number is <32 bits and positive. */
6378 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6379 fprintf (file, "0x%lx%08lx",
6380 (unsigned long) CONST_DOUBLE_HIGH (x),
6381 (unsigned long) CONST_DOUBLE_LOW (x));
6383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6386 /* We can't handle floating point constants;
6387 PRINT_OPERAND must handle them. */
6388 output_operand_lossage ("floating constant misused");
6392 /* Some assemblers need integer constants to appear first. */
6393 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6395 output_pic_addr_const (file, XEXP (x, 0), code);
6397 output_pic_addr_const (file, XEXP (x, 1), code);
6399 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6401 output_pic_addr_const (file, XEXP (x, 1), code);
6403 output_pic_addr_const (file, XEXP (x, 0), code);
6411 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6412 output_pic_addr_const (file, XEXP (x, 0), code);
6414 output_pic_addr_const (file, XEXP (x, 1), code);
6416 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6420 if (XVECLEN (x, 0) != 1)
6422 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6423 switch (XINT (x, 1))
6426 fputs ("@GOT", file);
6429 fputs ("@GOTOFF", file);
6431 case UNSPEC_GOTPCREL:
6432 fputs ("@GOTPCREL(%rip)", file);
6434 case UNSPEC_GOTTPOFF:
6435 /* FIXME: This might be @TPOFF in Sun ld too. */
6436 fputs ("@GOTTPOFF", file);
6439 fputs ("@TPOFF", file);
6443 fputs ("@TPOFF", file);
6445 fputs ("@NTPOFF", file);
6448 fputs ("@DTPOFF", file);
6450 case UNSPEC_GOTNTPOFF:
6452 fputs ("@GOTTPOFF(%rip)", file);
6454 fputs ("@GOTNTPOFF", file);
6456 case UNSPEC_INDNTPOFF:
6457 fputs ("@INDNTPOFF", file);
6460 output_operand_lossage ("invalid UNSPEC as operand");
6466 output_operand_lossage ("invalid expression as operand");
6470 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6471 We need to handle our special PIC relocations. */
6474 i386_dwarf_output_addr_const (file, x)
6479 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6483 fprintf (file, "%s", ASM_LONG);
6486 output_pic_addr_const (file, x, '\0');
6488 output_addr_const (file, x);
6492 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6493 We need to emit DTP-relative relocations. */
6496 i386_output_dwarf_dtprel (file, size, x)
6501 fputs (ASM_LONG, file);
6502 output_addr_const (file, x);
6503 fputs ("@DTPOFF", file);
6509 fputs (", 0", file);
6516 /* In the name of slightly smaller debug output, and to cater to
6517 general assembler losage, recognize PIC+GOTOFF and turn it back
6518 into a direct symbol reference. */
6521 i386_simplify_dwarf_addr (orig_x)
6526 if (GET_CODE (x) == MEM)
6531 if (GET_CODE (x) != CONST
6532 || GET_CODE (XEXP (x, 0)) != UNSPEC
6533 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6534 || GET_CODE (orig_x) != MEM)
6536 return XVECEXP (XEXP (x, 0), 0, 0);
6539 if (GET_CODE (x) != PLUS
6540 || GET_CODE (XEXP (x, 1)) != CONST)
6543 if (GET_CODE (XEXP (x, 0)) == REG
6544 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6545 /* %ebx + GOT/GOTOFF */
6547 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6549 /* %ebx + %reg * scale + GOT/GOTOFF */
6551 if (GET_CODE (XEXP (y, 0)) == REG
6552 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6554 else if (GET_CODE (XEXP (y, 1)) == REG
6555 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6559 if (GET_CODE (y) != REG
6560 && GET_CODE (y) != MULT
6561 && GET_CODE (y) != ASHIFT)
6567 x = XEXP (XEXP (x, 1), 0);
6568 if (GET_CODE (x) == UNSPEC
6569 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6570 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6573 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6574 return XVECEXP (x, 0, 0);
6577 if (GET_CODE (x) == PLUS
6578 && GET_CODE (XEXP (x, 0)) == UNSPEC
6579 && GET_CODE (XEXP (x, 1)) == CONST_INT
6580 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6581 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6582 && GET_CODE (orig_x) != MEM)))
6584 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6586 return gen_rtx_PLUS (Pmode, y, x);
6594 put_condition_code (code, mode, reverse, fp, file)
6596 enum machine_mode mode;
6602 if (mode == CCFPmode || mode == CCFPUmode)
6604 enum rtx_code second_code, bypass_code;
6605 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6606 if (bypass_code != NIL || second_code != NIL)
6608 code = ix86_fp_compare_code_to_integer (code);
6612 code = reverse_condition (code);
6623 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6628 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6629 Those same assemblers have the same but opposite losage on cmov. */
6632 suffix = fp ? "nbe" : "a";
6635 if (mode == CCNOmode || mode == CCGOCmode)
6637 else if (mode == CCmode || mode == CCGCmode)
6648 if (mode == CCNOmode || mode == CCGOCmode)
6650 else if (mode == CCmode || mode == CCGCmode)
6659 suffix = fp ? "nb" : "ae";
6662 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6672 suffix = fp ? "u" : "p";
6675 suffix = fp ? "nu" : "np";
6680 fputs (suffix, file);
6684 print_reg (x, code, file)
6689 if (REGNO (x) == ARG_POINTER_REGNUM
6690 || REGNO (x) == FRAME_POINTER_REGNUM
6691 || REGNO (x) == FLAGS_REG
6692 || REGNO (x) == FPSR_REG)
6695 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6698 if (code == 'w' || MMX_REG_P (x))
6700 else if (code == 'b')
6702 else if (code == 'k')
6704 else if (code == 'q')
6706 else if (code == 'y')
6708 else if (code == 'h')
6711 code = GET_MODE_SIZE (GET_MODE (x));
6713 /* Irritatingly, AMD extended registers use different naming convention
6714 from the normal registers. */
6715 if (REX_INT_REG_P (x))
6722 error ("extended registers have no high halves");
6725 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6728 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6731 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6734 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6737 error ("unsupported operand size for extended register");
6745 if (STACK_TOP_P (x))
6747 fputs ("st(0)", file);
6754 if (! ANY_FP_REG_P (x))
6755 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6759 fputs (hi_reg_name[REGNO (x)], file);
6762 fputs (qi_reg_name[REGNO (x)], file);
6765 fputs (qi_high_reg_name[REGNO (x)], file);
6772 /* Locate some local-dynamic symbol still in use by this function
6773 so that we can print its name in some tls_local_dynamic_base
6777 get_some_local_dynamic_name ()
6781 if (cfun->machine->some_ld_name)
6782 return cfun->machine->some_ld_name;
6784 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6786 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6787 return cfun->machine->some_ld_name;
6793 get_some_local_dynamic_name_1 (px, data)
6795 void *data ATTRIBUTE_UNUSED;
6799 if (GET_CODE (x) == SYMBOL_REF
6800 && local_dynamic_symbolic_operand (x, Pmode))
6802 cfun->machine->some_ld_name = XSTR (x, 0);
6810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6811 C -- print opcode suffix for set/cmov insn.
6812 c -- like C, but print reversed condition
6813 F,f -- likewise, but for floating-point.
6814 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6816 R -- print the prefix for register names.
6817 z -- print the opcode suffix for the size of the current operand.
6818 * -- print a star (in certain assembler syntax)
6819 A -- print an absolute memory reference.
6820 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6821 s -- print a shift double count, followed by the assemblers argument
6823 b -- print the QImode name of the register for the indicated operand.
6824 %b0 would print %al if operands[0] is reg 0.
6825 w -- likewise, print the HImode name of the register.
6826 k -- likewise, print the SImode name of the register.
6827 q -- likewise, print the DImode name of the register.
6828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6829 y -- print "st(0)" instead of "st" as a register.
6830 D -- print condition for SSE cmp instruction.
6831 P -- if PIC, print an @PLT suffix.
6832 X -- don't print any sort of PIC '@' suffix for a symbol.
6833 & -- print some in-use local-dynamic symbol name.
6837 print_operand (file, x, code)
6847 if (ASSEMBLER_DIALECT == ASM_ATT)
6852 assemble_name (file, get_some_local_dynamic_name ());
6856 if (ASSEMBLER_DIALECT == ASM_ATT)
6858 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6860 /* Intel syntax. For absolute addresses, registers should not
6861 be surrounded by braces. */
6862 if (GET_CODE (x) != REG)
6865 PRINT_OPERAND (file, x, 0);
6873 PRINT_OPERAND (file, x, 0);
6878 if (ASSEMBLER_DIALECT == ASM_ATT)
6883 if (ASSEMBLER_DIALECT == ASM_ATT)
6888 if (ASSEMBLER_DIALECT == ASM_ATT)
6893 if (ASSEMBLER_DIALECT == ASM_ATT)
6898 if (ASSEMBLER_DIALECT == ASM_ATT)
6903 if (ASSEMBLER_DIALECT == ASM_ATT)
6908 /* 387 opcodes don't get size suffixes if the operands are
6910 if (STACK_REG_P (x))
6913 /* Likewise if using Intel opcodes. */
6914 if (ASSEMBLER_DIALECT == ASM_INTEL)
6917 /* This is the size of op from size of operand. */
6918 switch (GET_MODE_SIZE (GET_MODE (x)))
6921 #ifdef HAVE_GAS_FILDS_FISTS
6927 if (GET_MODE (x) == SFmode)
6942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6944 #ifdef GAS_MNEMONICS
6970 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6972 PRINT_OPERAND (file, x, 0);
6978 /* Little bit of braindamage here. The SSE compare instructions
6979 does use completely different names for the comparisons that the
6980 fp conditional moves. */
6981 switch (GET_CODE (x))
6996 fputs ("unord", file);
7000 fputs ("neq", file);
7004 fputs ("nlt", file);
7008 fputs ("nle", file);
7011 fputs ("ord", file);
7019 #ifdef CMOV_SUN_AS_SYNTAX
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7022 switch (GET_MODE (x))
7024 case HImode: putc ('w', file); break;
7026 case SFmode: putc ('l', file); break;
7028 case DFmode: putc ('q', file); break;
7036 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7039 #ifdef CMOV_SUN_AS_SYNTAX
7040 if (ASSEMBLER_DIALECT == ASM_ATT)
7043 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7046 /* Like above, but reverse condition */
7048 /* Check to see if argument to %c is really a constant
7049 and not a condition code which needs to be reversed. */
7050 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7052 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7055 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7058 #ifdef CMOV_SUN_AS_SYNTAX
7059 if (ASSEMBLER_DIALECT == ASM_ATT)
7062 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7068 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7071 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7074 int pred_val = INTVAL (XEXP (x, 0));
7076 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7077 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7079 int taken = pred_val > REG_BR_PROB_BASE / 2;
7080 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7082 /* Emit hints only in the case default branch prediction
7083 heuristics would fail. */
7084 if (taken != cputaken)
7086 /* We use 3e (DS) prefix for taken branches and
7087 2e (CS) prefix for not taken branches. */
7089 fputs ("ds ; ", file);
7091 fputs ("cs ; ", file);
7098 output_operand_lossage ("invalid operand code `%c'", code);
7102 if (GET_CODE (x) == REG)
7104 PRINT_REG (x, code, file);
7107 else if (GET_CODE (x) == MEM)
7109 /* No `byte ptr' prefix for call instructions. */
7110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7113 switch (GET_MODE_SIZE (GET_MODE (x)))
7115 case 1: size = "BYTE"; break;
7116 case 2: size = "WORD"; break;
7117 case 4: size = "DWORD"; break;
7118 case 8: size = "QWORD"; break;
7119 case 12: size = "XWORD"; break;
7120 case 16: size = "XMMWORD"; break;
7125 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7128 else if (code == 'w')
7130 else if (code == 'k')
7134 fputs (" PTR ", file);
7138 if (flag_pic && CONSTANT_ADDRESS_P (x))
7139 output_pic_addr_const (file, x, code);
7140 /* Avoid (%rip) for call operands. */
7141 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7142 && GET_CODE (x) != CONST_INT)
7143 output_addr_const (file, x);
7144 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7145 output_operand_lossage ("invalid constraints for operand");
7150 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7156 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7160 fprintf (file, "0x%lx", l);
7163 /* These float cases don't actually occur as immediate operands. */
7164 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7168 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7169 fprintf (file, "%s", dstr);
7172 else if (GET_CODE (x) == CONST_DOUBLE
7173 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7177 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7178 fprintf (file, "%s", dstr);
7185 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7187 if (ASSEMBLER_DIALECT == ASM_ATT)
7190 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7191 || GET_CODE (x) == LABEL_REF)
7193 if (ASSEMBLER_DIALECT == ASM_ATT)
7196 fputs ("OFFSET FLAT:", file);
7199 if (GET_CODE (x) == CONST_INT)
7200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7202 output_pic_addr_const (file, x, code);
7204 output_addr_const (file, x);
7208 /* Print a memory operand whose address is ADDR. */
7211 print_operand_address (file, addr)
7215 struct ix86_address parts;
7216 rtx base, index, disp;
7219 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7221 if (ASSEMBLER_DIALECT == ASM_INTEL)
7222 fputs ("DWORD PTR ", file);
7223 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7226 fputs ("fs:0", file);
7228 fputs ("gs:0", file);
7232 if (! ix86_decompose_address (addr, &parts))
7236 index = parts.index;
7238 scale = parts.scale;
7240 if (!base && !index)
7242 /* Displacement only requires special attention. */
7244 if (GET_CODE (disp) == CONST_INT)
7246 if (ASSEMBLER_DIALECT == ASM_INTEL)
7248 if (USER_LABEL_PREFIX[0] == 0)
7250 fputs ("ds:", file);
7252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7255 output_pic_addr_const (file, addr, 0);
7257 output_addr_const (file, addr);
7259 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7261 && ((GET_CODE (addr) == SYMBOL_REF
7262 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7263 || GET_CODE (addr) == LABEL_REF
7264 || (GET_CODE (addr) == CONST
7265 && GET_CODE (XEXP (addr, 0)) == PLUS
7266 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7267 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7268 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7269 fputs ("(%rip)", file);
7273 if (ASSEMBLER_DIALECT == ASM_ATT)
7278 output_pic_addr_const (file, disp, 0);
7279 else if (GET_CODE (disp) == LABEL_REF)
7280 output_asm_label (disp);
7282 output_addr_const (file, disp);
7287 PRINT_REG (base, 0, file);
7291 PRINT_REG (index, 0, file);
7293 fprintf (file, ",%d", scale);
7299 rtx offset = NULL_RTX;
7303 /* Pull out the offset of a symbol; print any symbol itself. */
7304 if (GET_CODE (disp) == CONST
7305 && GET_CODE (XEXP (disp, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7308 offset = XEXP (XEXP (disp, 0), 1);
7309 disp = gen_rtx_CONST (VOIDmode,
7310 XEXP (XEXP (disp, 0), 0));
7314 output_pic_addr_const (file, disp, 0);
7315 else if (GET_CODE (disp) == LABEL_REF)
7316 output_asm_label (disp);
7317 else if (GET_CODE (disp) == CONST_INT)
7320 output_addr_const (file, disp);
7326 PRINT_REG (base, 0, file);
7329 if (INTVAL (offset) >= 0)
7331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7342 PRINT_REG (index, 0, file);
7344 fprintf (file, "*%d", scale);
7352 output_addr_const_extra (file, x)
7358 if (GET_CODE (x) != UNSPEC)
7361 op = XVECEXP (x, 0, 0);
7362 switch (XINT (x, 1))
7364 case UNSPEC_GOTTPOFF:
7365 output_addr_const (file, op);
7366 /* FIXME: This might be @TPOFF in Sun ld. */
7367 fputs ("@GOTTPOFF", file);
7370 output_addr_const (file, op);
7371 fputs ("@TPOFF", file);
7374 output_addr_const (file, op);
7376 fputs ("@TPOFF", file);
7378 fputs ("@NTPOFF", file);
7381 output_addr_const (file, op);
7382 fputs ("@DTPOFF", file);
7384 case UNSPEC_GOTNTPOFF:
7385 output_addr_const (file, op);
7387 fputs ("@GOTTPOFF(%rip)", file);
7389 fputs ("@GOTNTPOFF", file);
7391 case UNSPEC_INDNTPOFF:
7392 output_addr_const (file, op);
7393 fputs ("@INDNTPOFF", file);
7403 /* Split one or more DImode RTL references into pairs of SImode
7404 references. The RTL can be REG, offsettable MEM, integer constant, or
7405 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7406 split and "num" is its length. lo_half and hi_half are output arrays
7407 that parallel "operands". */
7410 split_di (operands, num, lo_half, hi_half)
7413 rtx lo_half[], hi_half[];
7417 rtx op = operands[num];
7419 /* simplify_subreg refuse to split volatile memory addresses,
7420 but we still have to handle it. */
7421 if (GET_CODE (op) == MEM)
7423 lo_half[num] = adjust_address (op, SImode, 0);
7424 hi_half[num] = adjust_address (op, SImode, 4);
7428 lo_half[num] = simplify_gen_subreg (SImode, op,
7429 GET_MODE (op) == VOIDmode
7430 ? DImode : GET_MODE (op), 0);
7431 hi_half[num] = simplify_gen_subreg (SImode, op,
7432 GET_MODE (op) == VOIDmode
7433 ? DImode : GET_MODE (op), 4);
7437 /* Split one or more TImode RTL references into pairs of SImode
7438 references. The RTL can be REG, offsettable MEM, integer constant, or
7439 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7440 split and "num" is its length. lo_half and hi_half are output arrays
7441 that parallel "operands". */
7444 split_ti (operands, num, lo_half, hi_half)
7447 rtx lo_half[], hi_half[];
7451 rtx op = operands[num];
7453 /* simplify_subreg refuse to split volatile memory addresses, but we
7454 still have to handle it. */
7455 if (GET_CODE (op) == MEM)
7457 lo_half[num] = adjust_address (op, DImode, 0);
7458 hi_half[num] = adjust_address (op, DImode, 8);
7462 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7463 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7468 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7469 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7470 is the expression of the binary operation. The output may either be
7471 emitted here, or returned to the caller, like all output_* functions.
7473 There is no guarantee that the operands are the same mode, as they
7474 might be within FLOAT or FLOAT_EXTEND expressions. */
7476 #ifndef SYSV386_COMPAT
7477 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7478 wants to fix the assemblers because that causes incompatibility
7479 with gcc. No-one wants to fix gcc because that causes
7480 incompatibility with assemblers... You can use the option of
7481 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7482 #define SYSV386_COMPAT 1
7486 output_387_binary_op (insn, operands)
7490 static char buf[30];
7493 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7495 #ifdef ENABLE_CHECKING
7496 /* Even if we do not want to check the inputs, this documents input
7497 constraints. Which helps in understanding the following code. */
7498 if (STACK_REG_P (operands[0])
7499 && ((REG_P (operands[1])
7500 && REGNO (operands[0]) == REGNO (operands[1])
7501 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7502 || (REG_P (operands[2])
7503 && REGNO (operands[0]) == REGNO (operands[2])
7504 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7505 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7511 switch (GET_CODE (operands[3]))
7514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7523 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7532 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7541 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7542 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7556 if (GET_MODE (operands[0]) == SFmode)
7557 strcat (buf, "ss\t{%2, %0|%0, %2}");
7559 strcat (buf, "sd\t{%2, %0|%0, %2}");
7564 switch (GET_CODE (operands[3]))
7568 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7570 rtx temp = operands[2];
7571 operands[2] = operands[1];
7575 /* know operands[0] == operands[1]. */
7577 if (GET_CODE (operands[2]) == MEM)
7583 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7585 if (STACK_TOP_P (operands[0]))
7586 /* How is it that we are storing to a dead operand[2]?
7587 Well, presumably operands[1] is dead too. We can't
7588 store the result to st(0) as st(0) gets popped on this
7589 instruction. Instead store to operands[2] (which I
7590 think has to be st(1)). st(1) will be popped later.
7591 gcc <= 2.8.1 didn't have this check and generated
7592 assembly code that the Unixware assembler rejected. */
7593 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7599 if (STACK_TOP_P (operands[0]))
7600 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7602 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7607 if (GET_CODE (operands[1]) == MEM)
7613 if (GET_CODE (operands[2]) == MEM)
7619 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7622 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7623 derived assemblers, confusingly reverse the direction of
7624 the operation for fsub{r} and fdiv{r} when the
7625 destination register is not st(0). The Intel assembler
7626 doesn't have this brain damage. Read !SYSV386_COMPAT to
7627 figure out what the hardware really does. */
7628 if (STACK_TOP_P (operands[0]))
7629 p = "{p\t%0, %2|rp\t%2, %0}";
7631 p = "{rp\t%2, %0|p\t%0, %2}";
7633 if (STACK_TOP_P (operands[0]))
7634 /* As above for fmul/fadd, we can't store to st(0). */
7635 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7637 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7642 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7645 if (STACK_TOP_P (operands[0]))
7646 p = "{rp\t%0, %1|p\t%1, %0}";
7648 p = "{p\t%1, %0|rp\t%0, %1}";
7650 if (STACK_TOP_P (operands[0]))
7651 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7653 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7658 if (STACK_TOP_P (operands[0]))
7660 if (STACK_TOP_P (operands[1]))
7661 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7663 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7666 else if (STACK_TOP_P (operands[1]))
7669 p = "{\t%1, %0|r\t%0, %1}";
7671 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7677 p = "{r\t%2, %0|\t%0, %2}";
7679 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7692 /* Output code to initialize control word copies used by
7693 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7694 is set to control word rounding downwards. */
7696 emit_i387_cw_initialization (normal, round_down)
7697 rtx normal, round_down;
7699 rtx reg = gen_reg_rtx (HImode);
7701 emit_insn (gen_x86_fnstcw_1 (normal));
7702 emit_move_insn (reg, normal);
7703 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7705 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7707 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7708 emit_move_insn (round_down, reg);
7711 /* Output code for INSN to convert a float to a signed int. OPERANDS
7712 are the insn operands. The output may be [HSD]Imode and the input
7713 operand may be [SDX]Fmode. */
7716 output_fix_trunc (insn, operands)
7720 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7721 int dimode_p = GET_MODE (operands[0]) == DImode;
7723 /* Jump through a hoop or two for DImode, since the hardware has no
7724 non-popping instruction. We used to do this a different way, but
7725 that was somewhat fragile and broke with post-reload splitters. */
7726 if (dimode_p && !stack_top_dies)
7727 output_asm_insn ("fld\t%y1", operands);
7729 if (!STACK_TOP_P (operands[1]))
7732 if (GET_CODE (operands[0]) != MEM)
7735 output_asm_insn ("fldcw\t%3", operands);
7736 if (stack_top_dies || dimode_p)
7737 output_asm_insn ("fistp%z0\t%0", operands);
7739 output_asm_insn ("fist%z0\t%0", operands);
7740 output_asm_insn ("fldcw\t%2", operands);
7745 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7746 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7747 when fucom should be used. */
7750 output_fp_compare (insn, operands, eflags_p, unordered_p)
7753 int eflags_p, unordered_p;
7756 rtx cmp_op0 = operands[0];
7757 rtx cmp_op1 = operands[1];
7758 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7763 cmp_op1 = operands[2];
7767 if (GET_MODE (operands[0]) == SFmode)
7769 return "ucomiss\t{%1, %0|%0, %1}";
7771 return "comiss\t{%1, %0|%0, %y}";
7774 return "ucomisd\t{%1, %0|%0, %1}";
7776 return "comisd\t{%1, %0|%0, %y}";
7779 if (! STACK_TOP_P (cmp_op0))
7782 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7784 if (STACK_REG_P (cmp_op1)
7786 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7787 && REGNO (cmp_op1) != FIRST_STACK_REG)
7789 /* If both the top of the 387 stack dies, and the other operand
7790 is also a stack register that dies, then this must be a
7791 `fcompp' float compare */
7795 /* There is no double popping fcomi variant. Fortunately,
7796 eflags is immune from the fstp's cc clobbering. */
7798 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7800 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7808 return "fucompp\n\tfnstsw\t%0";
7810 return "fcompp\n\tfnstsw\t%0";
7823 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7825 static const char * const alt[24] =
7837 "fcomi\t{%y1, %0|%0, %y1}",
7838 "fcomip\t{%y1, %0|%0, %y1}",
7839 "fucomi\t{%y1, %0|%0, %y1}",
7840 "fucomip\t{%y1, %0|%0, %y1}",
7847 "fcom%z2\t%y2\n\tfnstsw\t%0",
7848 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7849 "fucom%z2\t%y2\n\tfnstsw\t%0",
7850 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7852 "ficom%z2\t%y2\n\tfnstsw\t%0",
7853 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7861 mask = eflags_p << 3;
7862 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7863 mask |= unordered_p << 1;
7864 mask |= stack_top_dies;
7877 ix86_output_addr_vec_elt (file, value)
7881 const char *directive = ASM_LONG;
7886 directive = ASM_QUAD;
7892 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7896 ix86_output_addr_diff_elt (file, value, rel)
7901 fprintf (file, "%s%s%d-%s%d\n",
7902 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7903 else if (HAVE_AS_GOTOFF_IN_DATA)
7904 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7906 else if (TARGET_MACHO)
7907 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7908 machopic_function_base_name () + 1);
7911 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7912 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7915 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7919 ix86_expand_clear (dest)
7924 /* We play register width games, which are only valid after reload. */
7925 if (!reload_completed)
7928 /* Avoid HImode and its attendant prefix byte. */
7929 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7930 dest = gen_rtx_REG (SImode, REGNO (dest));
7932 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7934 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7935 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7937 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7938 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7944 /* X is an unchanging MEM. If it is a constant pool reference, return
7945 the constant pool rtx, else NULL. */
7948 maybe_get_pool_constant (x)
7953 if (flag_pic && ! TARGET_64BIT)
7955 if (GET_CODE (x) != PLUS)
7957 if (XEXP (x, 0) != pic_offset_table_rtx)
7960 if (GET_CODE (x) != CONST)
7963 if (GET_CODE (x) != UNSPEC)
7965 if (XINT (x, 1) != UNSPEC_GOTOFF)
7967 x = XVECEXP (x, 0, 0);
7970 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7971 return get_pool_constant (x);
7977 ix86_expand_move (mode, operands)
7978 enum machine_mode mode;
7981 int strict = (reload_in_progress || reload_completed);
7982 rtx insn, op0, op1, tmp;
7987 if (tls_symbolic_operand (op1, Pmode))
7989 op1 = legitimize_address (op1, op1, VOIDmode);
7990 if (GET_CODE (op0) == MEM)
7992 tmp = gen_reg_rtx (mode);
7993 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7997 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8002 rtx temp = ((reload_in_progress
8003 || ((op0 && GET_CODE (op0) == REG)
8005 ? op0 : gen_reg_rtx (Pmode));
8006 op1 = machopic_indirect_data_reference (op1, temp);
8007 op1 = machopic_legitimize_pic_address (op1, mode,
8008 temp == op1 ? 0 : temp);
8012 if (MACHOPIC_INDIRECT)
8013 op1 = machopic_indirect_data_reference (op1, 0);
8017 insn = gen_rtx_SET (VOIDmode, op0, op1);
8021 #endif /* TARGET_MACHO */
8022 if (GET_CODE (op0) == MEM)
8023 op1 = force_reg (Pmode, op1);
8027 if (GET_CODE (temp) != REG)
8028 temp = gen_reg_rtx (Pmode);
8029 temp = legitimize_pic_address (op1, temp);
8037 if (GET_CODE (op0) == MEM
8038 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8039 || !push_operand (op0, mode))
8040 && GET_CODE (op1) == MEM)
8041 op1 = force_reg (mode, op1);
8043 if (push_operand (op0, mode)
8044 && ! general_no_elim_operand (op1, mode))
8045 op1 = copy_to_mode_reg (mode, op1);
8047 /* Force large constants in 64bit compilation into register
8048 to get them CSEed. */
8049 if (TARGET_64BIT && mode == DImode
8050 && immediate_operand (op1, mode)
8051 && !x86_64_zero_extended_value (op1)
8052 && !register_operand (op0, mode)
8053 && optimize && !reload_completed && !reload_in_progress)
8054 op1 = copy_to_mode_reg (mode, op1);
8056 if (FLOAT_MODE_P (mode))
8058 /* If we are loading a floating point constant to a register,
8059 force the value to memory now, since we'll get better code
8060 out the back end. */
8064 else if (GET_CODE (op1) == CONST_DOUBLE
8065 && register_operand (op0, mode))
8066 op1 = validize_mem (force_const_mem (mode, op1));
8070 insn = gen_rtx_SET (VOIDmode, op0, op1);
8076 ix86_expand_vector_move (mode, operands)
8077 enum machine_mode mode;
8080 /* Force constants other than zero into memory. We do not know how
8081 the instructions used to build constants modify the upper 64 bits
8082 of the register, once we have that information we may be able
8083 to handle some of them more efficiently. */
8084 if ((reload_in_progress | reload_completed) == 0
8085 && register_operand (operands[0], mode)
8086 && CONSTANT_P (operands[1]))
8087 operands[1] = force_const_mem (mode, operands[1]);
8089 /* Make operand1 a register if it isn't already. */
8091 && !register_operand (operands[0], mode)
8092 && !register_operand (operands[1], mode))
8094 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8095 emit_move_insn (operands[0], temp);
8099 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8102 /* Attempt to expand a binary operator. Make the expansion closer to the
8103 actual machine, then just general_operand, which will allow 3 separate
8104 memory references (one output, two input) in a single insn. */
8107 ix86_expand_binary_operator (code, mode, operands)
8109 enum machine_mode mode;
8112 int matching_memory;
8113 rtx src1, src2, dst, op, clob;
8119 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8120 if (GET_RTX_CLASS (code) == 'c'
8121 && (rtx_equal_p (dst, src2)
8122 || immediate_operand (src1, mode)))
8129 /* If the destination is memory, and we do not have matching source
8130 operands, do things in registers. */
8131 matching_memory = 0;
8132 if (GET_CODE (dst) == MEM)
8134 if (rtx_equal_p (dst, src1))
8135 matching_memory = 1;
8136 else if (GET_RTX_CLASS (code) == 'c'
8137 && rtx_equal_p (dst, src2))
8138 matching_memory = 2;
8140 dst = gen_reg_rtx (mode);
8143 /* Both source operands cannot be in memory. */
8144 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8146 if (matching_memory != 2)
8147 src2 = force_reg (mode, src2);
8149 src1 = force_reg (mode, src1);
8152 /* If the operation is not commutable, source 1 cannot be a constant
8153 or non-matching memory. */
8154 if ((CONSTANT_P (src1)
8155 || (!matching_memory && GET_CODE (src1) == MEM))
8156 && GET_RTX_CLASS (code) != 'c')
8157 src1 = force_reg (mode, src1);
8159 /* If optimizing, copy to regs to improve CSE */
8160 if (optimize && ! no_new_pseudos)
8162 if (GET_CODE (dst) == MEM)
8163 dst = gen_reg_rtx (mode);
8164 if (GET_CODE (src1) == MEM)
8165 src1 = force_reg (mode, src1);
8166 if (GET_CODE (src2) == MEM)
8167 src2 = force_reg (mode, src2);
8170 /* Emit the instruction. */
8172 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8173 if (reload_in_progress)
8175 /* Reload doesn't know about the flags register, and doesn't know that
8176 it doesn't want to clobber it. We can only do this with PLUS. */
8183 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8187 /* Fix up the destination if needed. */
8188 if (dst != operands[0])
8189 emit_move_insn (operands[0], dst);
8192 /* Return TRUE or FALSE depending on whether the binary operator meets the
8193 appropriate constraints. */
8196 ix86_binary_operator_ok (code, mode, operands)
8198 enum machine_mode mode ATTRIBUTE_UNUSED;
8201 /* Both source operands cannot be in memory. */
8202 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8204 /* If the operation is not commutable, source 1 cannot be a constant. */
8205 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8207 /* If the destination is memory, we must have a matching source operand. */
8208 if (GET_CODE (operands[0]) == MEM
8209 && ! (rtx_equal_p (operands[0], operands[1])
8210 || (GET_RTX_CLASS (code) == 'c'
8211 && rtx_equal_p (operands[0], operands[2]))))
8213 /* If the operation is not commutable and the source 1 is memory, we must
8214 have a matching destination. */
8215 if (GET_CODE (operands[1]) == MEM
8216 && GET_RTX_CLASS (code) != 'c'
8217 && ! rtx_equal_p (operands[0], operands[1]))
8222 /* Attempt to expand a unary operator. Make the expansion closer to the
8223 actual machine, then just general_operand, which will allow 2 separate
8224 memory references (one output, one input) in a single insn. */
8227 ix86_expand_unary_operator (code, mode, operands)
8229 enum machine_mode mode;
8232 int matching_memory;
8233 rtx src, dst, op, clob;
8238 /* If the destination is memory, and we do not have matching source
8239 operands, do things in registers. */
8240 matching_memory = 0;
8241 if (GET_CODE (dst) == MEM)
8243 if (rtx_equal_p (dst, src))
8244 matching_memory = 1;
8246 dst = gen_reg_rtx (mode);
8249 /* When source operand is memory, destination must match. */
8250 if (!matching_memory && GET_CODE (src) == MEM)
8251 src = force_reg (mode, src);
8253 /* If optimizing, copy to regs to improve CSE */
8254 if (optimize && ! no_new_pseudos)
8256 if (GET_CODE (dst) == MEM)
8257 dst = gen_reg_rtx (mode);
8258 if (GET_CODE (src) == MEM)
8259 src = force_reg (mode, src);
8262 /* Emit the instruction. */
8264 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8265 if (reload_in_progress || code == NOT)
8267 /* Reload doesn't know about the flags register, and doesn't know that
8268 it doesn't want to clobber it. */
8275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8279 /* Fix up the destination if needed. */
8280 if (dst != operands[0])
8281 emit_move_insn (operands[0], dst);
8284 /* Return TRUE or FALSE depending on whether the unary operator meets the
8285 appropriate constraints. */
8288 ix86_unary_operator_ok (code, mode, operands)
8289 enum rtx_code code ATTRIBUTE_UNUSED;
8290 enum machine_mode mode ATTRIBUTE_UNUSED;
8291 rtx operands[2] ATTRIBUTE_UNUSED;
8293 /* If one of operands is memory, source and destination must match. */
8294 if ((GET_CODE (operands[0]) == MEM
8295 || GET_CODE (operands[1]) == MEM)
8296 && ! rtx_equal_p (operands[0], operands[1]))
8301 /* Return TRUE or FALSE depending on whether the first SET in INSN
8302 has source and destination with matching CC modes, and that the
8303 CC mode is at least as constrained as REQ_MODE. */
8306 ix86_match_ccmode (insn, req_mode)
8308 enum machine_mode req_mode;
8311 enum machine_mode set_mode;
8313 set = PATTERN (insn);
8314 if (GET_CODE (set) == PARALLEL)
8315 set = XVECEXP (set, 0, 0);
8316 if (GET_CODE (set) != SET)
8318 if (GET_CODE (SET_SRC (set)) != COMPARE)
8321 set_mode = GET_MODE (SET_DEST (set));
8325 if (req_mode != CCNOmode
8326 && (req_mode != CCmode
8327 || XEXP (SET_SRC (set), 1) != const0_rtx))
8331 if (req_mode == CCGCmode)
8335 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8339 if (req_mode == CCZmode)
8349 return (GET_MODE (SET_SRC (set)) == set_mode);
8352 /* Generate insn patterns to do an integer compare of OPERANDS. */
8355 ix86_expand_int_compare (code, op0, op1)
8359 enum machine_mode cmpmode;
8362 cmpmode = SELECT_CC_MODE (code, op0, op1);
8363 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8365 /* This is very simple, but making the interface the same as in the
8366 FP case makes the rest of the code easier. */
8367 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8368 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8370 /* Return the test that should be put into the flags user, i.e.
8371 the bcc, scc, or cmov instruction. */
8372 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8375 /* Figure out whether to use ordered or unordered fp comparisons.
8376 Return the appropriate mode to use. */
8379 ix86_fp_compare_mode (code)
8380 enum rtx_code code ATTRIBUTE_UNUSED;
8382 /* ??? In order to make all comparisons reversible, we do all comparisons
8383 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8384 all forms trapping and nontrapping comparisons, we can make inequality
8385 comparisons trapping again, since it results in better code when using
8386 FCOM based compares. */
8387 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8391 ix86_cc_mode (code, op0, op1)
8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8396 return ix86_fp_compare_mode (code);
8399 /* Only zero flag is needed. */
8401 case NE: /* ZF!=0 */
8403 /* Codes needing carry flag. */
8404 case GEU: /* CF=0 */
8405 case GTU: /* CF=0 & ZF=0 */
8406 case LTU: /* CF=1 */
8407 case LEU: /* CF=1 | ZF=1 */
8409 /* Codes possibly doable only with sign flag when
8410 comparing against zero. */
8411 case GE: /* SF=OF or SF=0 */
8412 case LT: /* SF<>OF or SF=1 */
8413 if (op1 == const0_rtx)
8416 /* For other cases Carry flag is not required. */
8418 /* Codes doable only with sign flag when comparing
8419 against zero, but we miss jump instruction for it
8420 so we need to use relational tests agains overflow
8421 that thus needs to be zero. */
8422 case GT: /* ZF=0 & SF=OF */
8423 case LE: /* ZF=1 | SF<>OF */
8424 if (op1 == const0_rtx)
8428 /* strcmp pattern do (use flags) and combine may ask us for proper
8437 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8440 ix86_use_fcomi_compare (code)
8441 enum rtx_code code ATTRIBUTE_UNUSED;
8443 enum rtx_code swapped_code = swap_condition (code);
8444 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8445 || (ix86_fp_comparison_cost (swapped_code)
8446 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8449 /* Swap, force into registers, or otherwise massage the two operands
8450 to a fp comparison. The operands are updated in place; the new
8451 comparison code is returned. */
8453 static enum rtx_code
8454 ix86_prepare_fp_compare_args (code, pop0, pop1)
8458 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8459 rtx op0 = *pop0, op1 = *pop1;
8460 enum machine_mode op_mode = GET_MODE (op0);
8461 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8463 /* All of the unordered compare instructions only work on registers.
8464 The same is true of the XFmode compare instructions. The same is
8465 true of the fcomi compare instructions. */
8468 && (fpcmp_mode == CCFPUmode
8469 || op_mode == XFmode
8470 || op_mode == TFmode
8471 || ix86_use_fcomi_compare (code)))
8473 op0 = force_reg (op_mode, op0);
8474 op1 = force_reg (op_mode, op1);
8478 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8479 things around if they appear profitable, otherwise force op0
8482 if (standard_80387_constant_p (op0) == 0
8483 || (GET_CODE (op0) == MEM
8484 && ! (standard_80387_constant_p (op1) == 0
8485 || GET_CODE (op1) == MEM)))
8488 tmp = op0, op0 = op1, op1 = tmp;
8489 code = swap_condition (code);
8492 if (GET_CODE (op0) != REG)
8493 op0 = force_reg (op_mode, op0);
8495 if (CONSTANT_P (op1))
8497 if (standard_80387_constant_p (op1))
8498 op1 = force_reg (op_mode, op1);
8500 op1 = validize_mem (force_const_mem (op_mode, op1));
8504 /* Try to rearrange the comparison to make it cheaper. */
8505 if (ix86_fp_comparison_cost (code)
8506 > ix86_fp_comparison_cost (swap_condition (code))
8507 && (GET_CODE (op1) == REG || !no_new_pseudos))
8510 tmp = op0, op0 = op1, op1 = tmp;
8511 code = swap_condition (code);
8512 if (GET_CODE (op0) != REG)
8513 op0 = force_reg (op_mode, op0);
8521 /* Convert comparison codes we use to represent FP comparison to integer
8522 code that will result in proper branch. Return UNKNOWN if no such code
8524 static enum rtx_code
8525 ix86_fp_compare_code_to_integer (code)
8555 /* Split comparison code CODE into comparisons we can do using branch
8556 instructions. BYPASS_CODE is comparison code for branch that will
8557 branch around FIRST_CODE and SECOND_CODE. If some of branches
8558 is not required, set value to NIL.
8559 We never require more than two branches. */
8561 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8562 enum rtx_code code, *bypass_code, *first_code, *second_code;
8568 /* The fcomi comparison sets flags as follows:
8578 case GT: /* GTU - CF=0 & ZF=0 */
8579 case GE: /* GEU - CF=0 */
8580 case ORDERED: /* PF=0 */
8581 case UNORDERED: /* PF=1 */
8582 case UNEQ: /* EQ - ZF=1 */
8583 case UNLT: /* LTU - CF=1 */
8584 case UNLE: /* LEU - CF=1 | ZF=1 */
8585 case LTGT: /* EQ - ZF=0 */
8587 case LT: /* LTU - CF=1 - fails on unordered */
8589 *bypass_code = UNORDERED;
8591 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8593 *bypass_code = UNORDERED;
8595 case EQ: /* EQ - ZF=1 - fails on unordered */
8597 *bypass_code = UNORDERED;
8599 case NE: /* NE - ZF=0 - fails on unordered */
8601 *second_code = UNORDERED;
8603 case UNGE: /* GEU - CF=0 - fails on unordered */
8605 *second_code = UNORDERED;
8607 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8609 *second_code = UNORDERED;
8614 if (!TARGET_IEEE_FP)
8621 /* Return cost of comparison done fcom + arithmetics operations on AX.
8622 All following functions do use number of instructions as a cost metrics.
8623 In future this should be tweaked to compute bytes for optimize_size and
8624 take into account performance of various instructions on various CPUs. */
8626 ix86_fp_comparison_arithmetics_cost (code)
8629 if (!TARGET_IEEE_FP)
8631 /* The cost of code output by ix86_expand_fp_compare. */
8659 /* Return cost of comparison done using fcomi operation.
8660 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8662 ix86_fp_comparison_fcomi_cost (code)
8665 enum rtx_code bypass_code, first_code, second_code;
8666 /* Return arbitrarily high cost when instruction is not supported - this
8667 prevents gcc from using it. */
8670 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8671 return (bypass_code != NIL || second_code != NIL) + 2;
8674 /* Return cost of comparison done using sahf operation.
8675 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8677 ix86_fp_comparison_sahf_cost (code)
8680 enum rtx_code bypass_code, first_code, second_code;
8681 /* Return arbitrarily high cost when instruction is not preferred - this
8682 avoids gcc from using it. */
8683 if (!TARGET_USE_SAHF && !optimize_size)
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686 return (bypass_code != NIL || second_code != NIL) + 3;
8689 /* Compute cost of the comparison done using any method.
8690 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8692 ix86_fp_comparison_cost (code)
8695 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8698 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8699 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8701 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8702 if (min > sahf_cost)
8704 if (min > fcomi_cost)
8709 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8712 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8714 rtx op0, op1, scratch;
8718 enum machine_mode fpcmp_mode, intcmp_mode;
8720 int cost = ix86_fp_comparison_cost (code);
8721 enum rtx_code bypass_code, first_code, second_code;
8723 fpcmp_mode = ix86_fp_compare_mode (code);
8724 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8727 *second_test = NULL_RTX;
8729 *bypass_test = NULL_RTX;
8731 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8733 /* Do fcomi/sahf based test when profitable. */
8734 if ((bypass_code == NIL || bypass_test)
8735 && (second_code == NIL || second_test)
8736 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8740 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8741 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8747 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8748 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8750 scratch = gen_reg_rtx (HImode);
8751 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8752 emit_insn (gen_x86_sahf_1 (scratch));
8755 /* The FP codes work out to act like unsigned. */
8756 intcmp_mode = fpcmp_mode;
8758 if (bypass_code != NIL)
8759 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8760 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8762 if (second_code != NIL)
8763 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8764 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8769 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8771 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8773 scratch = gen_reg_rtx (HImode);
8774 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8776 /* In the unordered case, we have to check C2 for NaN's, which
8777 doesn't happen to work out to anything nice combination-wise.
8778 So do some bit twiddling on the value we've got in AH to come
8779 up with an appropriate set of condition codes. */
8781 intcmp_mode = CCNOmode;
8786 if (code == GT || !TARGET_IEEE_FP)
8788 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8794 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8795 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8796 intcmp_mode = CCmode;
8802 if (code == LT && TARGET_IEEE_FP)
8804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8806 intcmp_mode = CCmode;
8811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8817 if (code == GE || !TARGET_IEEE_FP)
8819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8824 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8825 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8832 if (code == LE && TARGET_IEEE_FP)
8834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8837 intcmp_mode = CCmode;
8842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8848 if (code == EQ && TARGET_IEEE_FP)
8850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8852 intcmp_mode = CCmode;
8857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8864 if (code == NE && TARGET_IEEE_FP)
8866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8867 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8892 /* Return the test that should be put into the flags user, i.e.
8893 the bcc, scc, or cmov instruction. */
8894 return gen_rtx_fmt_ee (code, VOIDmode,
8895 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8900 ix86_expand_compare (code, second_test, bypass_test)
8902 rtx *second_test, *bypass_test;
8905 op0 = ix86_compare_op0;
8906 op1 = ix86_compare_op1;
8909 *second_test = NULL_RTX;
8911 *bypass_test = NULL_RTX;
8913 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8914 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8915 second_test, bypass_test);
8917 ret = ix86_expand_int_compare (code, op0, op1);
8922 /* Return true if the CODE will result in nontrivial jump sequence. */
8924 ix86_fp_jump_nontrivial_p (code)
8927 enum rtx_code bypass_code, first_code, second_code;
8930 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8931 return bypass_code != NIL || second_code != NIL;
8935 ix86_expand_branch (code, label)
8941 switch (GET_MODE (ix86_compare_op0))
8947 tmp = ix86_expand_compare (code, NULL, NULL);
8948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8949 gen_rtx_LABEL_REF (VOIDmode, label),
8951 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8961 enum rtx_code bypass_code, first_code, second_code;
8963 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8968 /* Check whether we will use the natural sequence with one jump. If
8969 so, we can expand jump early. Otherwise delay expansion by
8970 creating compound insn to not confuse optimizers. */
8971 if (bypass_code == NIL && second_code == NIL
8974 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8975 gen_rtx_LABEL_REF (VOIDmode, label),
8980 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8981 ix86_compare_op0, ix86_compare_op1);
8982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8983 gen_rtx_LABEL_REF (VOIDmode, label),
8985 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8987 use_fcomi = ix86_use_fcomi_compare (code);
8988 vec = rtvec_alloc (3 + !use_fcomi);
8989 RTVEC_ELT (vec, 0) = tmp;
8991 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8996 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8998 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9006 /* Expand DImode branch into multiple compare+branch. */
9008 rtx lo[2], hi[2], label2;
9009 enum rtx_code code1, code2, code3;
9011 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9013 tmp = ix86_compare_op0;
9014 ix86_compare_op0 = ix86_compare_op1;
9015 ix86_compare_op1 = tmp;
9016 code = swap_condition (code);
9018 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9019 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9021 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9022 avoid two branches. This costs one extra insn, so disable when
9023 optimizing for size. */
9025 if ((code == EQ || code == NE)
9027 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9032 if (hi[1] != const0_rtx)
9033 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9034 NULL_RTX, 0, OPTAB_WIDEN);
9037 if (lo[1] != const0_rtx)
9038 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9039 NULL_RTX, 0, OPTAB_WIDEN);
9041 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9042 NULL_RTX, 0, OPTAB_WIDEN);
9044 ix86_compare_op0 = tmp;
9045 ix86_compare_op1 = const0_rtx;
9046 ix86_expand_branch (code, label);
9050 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9051 op1 is a constant and the low word is zero, then we can just
9052 examine the high word. */
9054 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9057 case LT: case LTU: case GE: case GEU:
9058 ix86_compare_op0 = hi[0];
9059 ix86_compare_op1 = hi[1];
9060 ix86_expand_branch (code, label);
9066 /* Otherwise, we need two or three jumps. */
9068 label2 = gen_label_rtx ();
9071 code2 = swap_condition (code);
9072 code3 = unsigned_condition (code);
9076 case LT: case GT: case LTU: case GTU:
9079 case LE: code1 = LT; code2 = GT; break;
9080 case GE: code1 = GT; code2 = LT; break;
9081 case LEU: code1 = LTU; code2 = GTU; break;
9082 case GEU: code1 = GTU; code2 = LTU; break;
9084 case EQ: code1 = NIL; code2 = NE; break;
9085 case NE: code2 = NIL; break;
9093 * if (hi(a) < hi(b)) goto true;
9094 * if (hi(a) > hi(b)) goto false;
9095 * if (lo(a) < lo(b)) goto true;
9099 ix86_compare_op0 = hi[0];
9100 ix86_compare_op1 = hi[1];
9103 ix86_expand_branch (code1, label);
9105 ix86_expand_branch (code2, label2);
9107 ix86_compare_op0 = lo[0];
9108 ix86_compare_op1 = lo[1];
9109 ix86_expand_branch (code3, label);
9112 emit_label (label2);
9121 /* Split branch based on floating point condition. */
9123 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9125 rtx op1, op2, target1, target2, tmp;
9128 rtx label = NULL_RTX;
9130 int bypass_probability = -1, second_probability = -1, probability = -1;
9133 if (target2 != pc_rtx)
9136 code = reverse_condition_maybe_unordered (code);
9141 condition = ix86_expand_fp_compare (code, op1, op2,
9142 tmp, &second, &bypass);
9144 if (split_branch_probability >= 0)
9146 /* Distribute the probabilities across the jumps.
9147 Assume the BYPASS and SECOND to be always test
9149 probability = split_branch_probability;
9151 /* Value of 1 is low enough to make no need for probability
9152 to be updated. Later we may run some experiments and see
9153 if unordered values are more frequent in practice. */
9155 bypass_probability = 1;
9157 second_probability = 1;
9159 if (bypass != NULL_RTX)
9161 label = gen_label_rtx ();
9162 i = emit_jump_insn (gen_rtx_SET
9164 gen_rtx_IF_THEN_ELSE (VOIDmode,
9166 gen_rtx_LABEL_REF (VOIDmode,
9169 if (bypass_probability >= 0)
9171 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9172 GEN_INT (bypass_probability),
9175 i = emit_jump_insn (gen_rtx_SET
9177 gen_rtx_IF_THEN_ELSE (VOIDmode,
9178 condition, target1, target2)));
9179 if (probability >= 0)
9181 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9182 GEN_INT (probability),
9184 if (second != NULL_RTX)
9186 i = emit_jump_insn (gen_rtx_SET
9188 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9190 if (second_probability >= 0)
9192 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9193 GEN_INT (second_probability),
9196 if (label != NULL_RTX)
9201 ix86_expand_setcc (code, dest)
9205 rtx ret, tmp, tmpreg;
9206 rtx second_test, bypass_test;
9208 if (GET_MODE (ix86_compare_op0) == DImode
9210 return 0; /* FAIL */
9212 if (GET_MODE (dest) != QImode)
9215 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9216 PUT_MODE (ret, QImode);
9221 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9222 if (bypass_test || second_test)
9224 rtx test = second_test;
9226 rtx tmp2 = gen_reg_rtx (QImode);
9233 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9235 PUT_MODE (test, QImode);
9236 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9239 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9241 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9244 return 1; /* DONE */
9247 /* Expand comparison setting or clearing carry flag. Return true when successful
9248 and set pop for the operation. */
9250 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9254 enum machine_mode mode =
9255 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9257 /* Do not handle DImode compares that go trought special path. Also we can't
9258 deal with FP compares yet. This is possible to add. */
9259 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9267 /* Convert a==0 into (unsigned)a<1. */
9270 if (op1 != const0_rtx)
9273 code = (code == EQ ? LTU : GEU);
9276 /* Convert a>b into b<a or a>=b-1. */
9279 if (GET_CODE (op1) == CONST_INT)
9281 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9282 /* Bail out on overflow. We still can swap operands but that
9283 would force loading of the constant into register. */
9284 if (op1 == const0_rtx
9285 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9287 code = (code == GTU ? GEU : LTU);
9294 code = (code == GTU ? LTU : GEU);
9298 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9301 if (mode == DImode || op1 != const0_rtx)
9303 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9304 code = (code == LT ? GEU : LTU);
9308 if (mode == DImode || op1 != constm1_rtx)
9310 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9311 code = (code == LE ? GEU : LTU);
9317 ix86_compare_op0 = op0;
9318 ix86_compare_op1 = op1;
9319 *pop = ix86_expand_compare (code, NULL, NULL);
9320 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9326 ix86_expand_int_movcc (operands)
9329 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9330 rtx compare_seq, compare_op;
9331 rtx second_test, bypass_test;
9332 enum machine_mode mode = GET_MODE (operands[0]);
9333 bool sign_bit_compare_p = false;;
9336 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9337 compare_seq = get_insns ();
9340 compare_code = GET_CODE (compare_op);
9342 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9343 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9344 sign_bit_compare_p = true;
9346 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9347 HImode insns, we'd be swallowed in word prefix ops. */
9349 if ((mode != HImode || TARGET_FAST_PREFIX)
9350 && (mode != DImode || TARGET_64BIT)
9351 && GET_CODE (operands[2]) == CONST_INT
9352 && GET_CODE (operands[3]) == CONST_INT)
9354 rtx out = operands[0];
9355 HOST_WIDE_INT ct = INTVAL (operands[2]);
9356 HOST_WIDE_INT cf = INTVAL (operands[3]);
9360 /* Sign bit compares are better done using shifts than we do by using
9362 if (sign_bit_compare_p
9363 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9364 ix86_compare_op1, &compare_op))
9366 /* Detect overlap between destination and compare sources. */
9369 if (!sign_bit_compare_p)
9371 compare_code = GET_CODE (compare_op);
9373 /* To simplify rest of code, restrict to the GEU case. */
9374 if (compare_code == LTU)
9376 HOST_WIDE_INT tmp = ct;
9379 compare_code = reverse_condition (compare_code);
9380 code = reverse_condition (code);
9384 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9385 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9386 tmp = gen_reg_rtx (mode);
9389 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9391 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9395 if (code == GT || code == GE)
9396 code = reverse_condition (code);
9399 HOST_WIDE_INT tmp = ct;
9403 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9404 ix86_compare_op1, VOIDmode, 0, -1);
9417 tmp = expand_simple_binop (mode, PLUS,
9419 copy_rtx (tmp), 1, OPTAB_DIRECT);
9430 tmp = expand_simple_binop (mode, IOR,
9432 copy_rtx (tmp), 1, OPTAB_DIRECT);
9434 else if (diff == -1 && ct)
9444 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9446 tmp = expand_simple_binop (mode, PLUS,
9447 copy_rtx (tmp), GEN_INT (cf),
9448 copy_rtx (tmp), 1, OPTAB_DIRECT);
9456 * andl cf - ct, dest
9466 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9469 tmp = expand_simple_binop (mode, AND,
9471 gen_int_mode (cf - ct, mode),
9472 copy_rtx (tmp), 1, OPTAB_DIRECT);
9474 tmp = expand_simple_binop (mode, PLUS,
9475 copy_rtx (tmp), GEN_INT (ct),
9476 copy_rtx (tmp), 1, OPTAB_DIRECT);
9479 if (!rtx_equal_p (tmp, out))
9480 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9482 return 1; /* DONE */
9488 tmp = ct, ct = cf, cf = tmp;
9490 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9492 /* We may be reversing unordered compare to normal compare, that
9493 is not valid in general (we may convert non-trapping condition
9494 to trapping one), however on i386 we currently emit all
9495 comparisons unordered. */
9496 compare_code = reverse_condition_maybe_unordered (compare_code);
9497 code = reverse_condition_maybe_unordered (code);
9501 compare_code = reverse_condition (compare_code);
9502 code = reverse_condition (code);
9507 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9508 && GET_CODE (ix86_compare_op1) == CONST_INT)
9510 if (ix86_compare_op1 == const0_rtx
9511 && (code == LT || code == GE))
9512 compare_code = code;
9513 else if (ix86_compare_op1 == constm1_rtx)
9517 else if (code == GT)
9522 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9523 if (compare_code != NIL
9524 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9525 && (cf == -1 || ct == -1))
9527 /* If lea code below could be used, only optimize
9528 if it results in a 2 insn sequence. */
9530 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9531 || diff == 3 || diff == 5 || diff == 9)
9532 || (compare_code == LT && ct == -1)
9533 || (compare_code == GE && cf == -1))
9536 * notl op1 (if necessary)
9544 code = reverse_condition (code);
9547 out = emit_store_flag (out, code, ix86_compare_op0,
9548 ix86_compare_op1, VOIDmode, 0, -1);
9550 out = expand_simple_binop (mode, IOR,
9552 out, 1, OPTAB_DIRECT);
9553 if (out != operands[0])
9554 emit_move_insn (operands[0], out);
9556 return 1; /* DONE */
9561 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9562 || diff == 3 || diff == 5 || diff == 9)
9563 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9564 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9570 * lea cf(dest*(ct-cf)),dest
9574 * This also catches the degenerate setcc-only case.
9580 out = emit_store_flag (out, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, 1);
9584 /* On x86_64 the lea instruction operates on Pmode, so we need
9585 to get arithmetics done in proper mode to match. */
9587 tmp = copy_rtx (out);
9591 out1 = copy_rtx (out);
9592 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9596 tmp = gen_rtx_PLUS (mode, tmp, out1);
9602 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9605 if (!rtx_equal_p (tmp, out))
9608 out = force_operand (tmp, out);
9610 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9612 if (!rtx_equal_p (out, operands[0]))
9613 emit_move_insn (operands[0], copy_rtx (out));
9615 return 1; /* DONE */
9619 * General case: Jumpful:
9620 * xorl dest,dest cmpl op1, op2
9621 * cmpl op1, op2 movl ct, dest
9623 * decl dest movl cf, dest
9624 * andl (cf-ct),dest 1:
9629 * This is reasonably steep, but branch mispredict costs are
9630 * high on modern cpus, so consider failing only if optimizing
9634 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9635 && BRANCH_COST >= 2)
9641 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9642 /* We may be reversing unordered compare to normal compare,
9643 that is not valid in general (we may convert non-trapping
9644 condition to trapping one), however on i386 we currently
9645 emit all comparisons unordered. */
9646 code = reverse_condition_maybe_unordered (code);
9649 code = reverse_condition (code);
9650 if (compare_code != NIL)
9651 compare_code = reverse_condition (compare_code);
9655 if (compare_code != NIL)
9657 /* notl op1 (if needed)
9662 For x < 0 (resp. x <= -1) there will be no notl,
9663 so if possible swap the constants to get rid of the
9665 True/false will be -1/0 while code below (store flag
9666 followed by decrement) is 0/-1, so the constants need
9667 to be exchanged once more. */
9669 if (compare_code == GE || !cf)
9671 code = reverse_condition (code);
9676 HOST_WIDE_INT tmp = cf;
9681 out = emit_store_flag (out, code, ix86_compare_op0,
9682 ix86_compare_op1, VOIDmode, 0, -1);
9686 out = emit_store_flag (out, code, ix86_compare_op0,
9687 ix86_compare_op1, VOIDmode, 0, 1);
9689 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9690 copy_rtx (out), 1, OPTAB_DIRECT);
9693 out = expand_simple_binop (mode, AND, copy_rtx (out),
9694 gen_int_mode (cf - ct, mode),
9695 copy_rtx (out), 1, OPTAB_DIRECT);
9697 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9698 copy_rtx (out), 1, OPTAB_DIRECT);
9699 if (!rtx_equal_p (out, operands[0]))
9700 emit_move_insn (operands[0], copy_rtx (out));
9702 return 1; /* DONE */
9706 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9708 /* Try a few things more with specific constants and a variable. */
9711 rtx var, orig_out, out, tmp;
9713 if (BRANCH_COST <= 2)
9714 return 0; /* FAIL */
9716 /* If one of the two operands is an interesting constant, load a
9717 constant with the above and mask it in with a logical operation. */
9719 if (GET_CODE (operands[2]) == CONST_INT)
9722 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9723 operands[3] = constm1_rtx, op = and_optab;
9724 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9725 operands[3] = const0_rtx, op = ior_optab;
9727 return 0; /* FAIL */
9729 else if (GET_CODE (operands[3]) == CONST_INT)
9732 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9733 operands[2] = constm1_rtx, op = and_optab;
9734 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9735 operands[2] = const0_rtx, op = ior_optab;
9737 return 0; /* FAIL */
9740 return 0; /* FAIL */
9742 orig_out = operands[0];
9743 tmp = gen_reg_rtx (mode);
9746 /* Recurse to get the constant loaded. */
9747 if (ix86_expand_int_movcc (operands) == 0)
9748 return 0; /* FAIL */
9750 /* Mask in the interesting variable. */
9751 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9753 if (!rtx_equal_p (out, orig_out))
9754 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9756 return 1; /* DONE */
9760 * For comparison with above,
9770 if (! nonimmediate_operand (operands[2], mode))
9771 operands[2] = force_reg (mode, operands[2]);
9772 if (! nonimmediate_operand (operands[3], mode))
9773 operands[3] = force_reg (mode, operands[3]);
9775 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9777 rtx tmp = gen_reg_rtx (mode);
9778 emit_move_insn (tmp, operands[3]);
9781 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9783 rtx tmp = gen_reg_rtx (mode);
9784 emit_move_insn (tmp, operands[2]);
9788 if (! register_operand (operands[2], VOIDmode)
9790 || ! register_operand (operands[3], VOIDmode)))
9791 operands[2] = force_reg (mode, operands[2]);
9794 && ! register_operand (operands[3], VOIDmode))
9795 operands[3] = force_reg (mode, operands[3]);
9797 emit_insn (compare_seq);
9798 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9799 gen_rtx_IF_THEN_ELSE (mode,
9800 compare_op, operands[2],
9803 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9804 gen_rtx_IF_THEN_ELSE (mode,
9806 copy_rtx (operands[3]),
9807 copy_rtx (operands[0]))));
9809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9810 gen_rtx_IF_THEN_ELSE (mode,
9812 copy_rtx (operands[2]),
9813 copy_rtx (operands[0]))));
9815 return 1; /* DONE */
9819 ix86_expand_fp_movcc (operands)
9824 rtx compare_op, second_test, bypass_test;
9826 /* For SF/DFmode conditional moves based on comparisons
9827 in same mode, we may want to use SSE min/max instructions. */
9828 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9829 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9830 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9831 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9833 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9834 /* We may be called from the post-reload splitter. */
9835 && (!REG_P (operands[0])
9836 || SSE_REG_P (operands[0])
9837 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9839 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9840 code = GET_CODE (operands[1]);
9842 /* See if we have (cross) match between comparison operands and
9843 conditional move operands. */
9844 if (rtx_equal_p (operands[2], op1))
9849 code = reverse_condition_maybe_unordered (code);
9851 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9853 /* Check for min operation. */
9854 if (code == LT || code == UNLE)
9862 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9863 if (memory_operand (op0, VOIDmode))
9864 op0 = force_reg (GET_MODE (operands[0]), op0);
9865 if (GET_MODE (operands[0]) == SFmode)
9866 emit_insn (gen_minsf3 (operands[0], op0, op1));
9868 emit_insn (gen_mindf3 (operands[0], op0, op1));
9871 /* Check for max operation. */
9872 if (code == GT || code == UNGE)
9880 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9881 if (memory_operand (op0, VOIDmode))
9882 op0 = force_reg (GET_MODE (operands[0]), op0);
9883 if (GET_MODE (operands[0]) == SFmode)
9884 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9886 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9890 /* Manage condition to be sse_comparison_operator. In case we are
9891 in non-ieee mode, try to canonicalize the destination operand
9892 to be first in the comparison - this helps reload to avoid extra
9894 if (!sse_comparison_operator (operands[1], VOIDmode)
9895 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9897 rtx tmp = ix86_compare_op0;
9898 ix86_compare_op0 = ix86_compare_op1;
9899 ix86_compare_op1 = tmp;
9900 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9901 VOIDmode, ix86_compare_op0,
9904 /* Similarly try to manage result to be first operand of conditional
9905 move. We also don't support the NE comparison on SSE, so try to
9907 if ((rtx_equal_p (operands[0], operands[3])
9908 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9909 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9911 rtx tmp = operands[2];
9912 operands[2] = operands[3];
9914 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9915 (GET_CODE (operands[1])),
9916 VOIDmode, ix86_compare_op0,
9919 if (GET_MODE (operands[0]) == SFmode)
9920 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9921 operands[2], operands[3],
9922 ix86_compare_op0, ix86_compare_op1));
9924 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9925 operands[2], operands[3],
9926 ix86_compare_op0, ix86_compare_op1));
9930 /* The floating point conditional move instructions don't directly
9931 support conditions resulting from a signed integer comparison. */
9933 code = GET_CODE (operands[1]);
9934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9936 /* The floating point conditional move instructions don't directly
9937 support signed integer comparisons. */
9939 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9941 if (second_test != NULL || bypass_test != NULL)
9943 tmp = gen_reg_rtx (QImode);
9944 ix86_expand_setcc (code, tmp);
9946 ix86_compare_op0 = tmp;
9947 ix86_compare_op1 = const0_rtx;
9948 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9950 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9952 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9953 emit_move_insn (tmp, operands[3]);
9956 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9959 emit_move_insn (tmp, operands[2]);
9963 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9964 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9984 /* Expand conditional increment or decrement using adb/sbb instructions.
9985 The default case using setcc followed by the conditional move can be
9986 done by generic code. */
9988 ix86_expand_int_addcc (operands)
9991 enum rtx_code code = GET_CODE (operands[1]);
9993 rtx val = const0_rtx;
9995 if (operands[3] != const1_rtx
9996 && operands[3] != constm1_rtx)
9998 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9999 ix86_compare_op1, &compare_op))
10001 if (GET_CODE (compare_op) != LTU)
10002 val = operands[3] == const1_rtx ? constm1_rtx : const1_rtx;
10003 if ((GET_CODE (compare_op) == LTU) == (operands[3] == constm1_rtx))
10005 switch (GET_MODE (operands[0]))
10008 emit_insn (gen_subqi3_carry (operands[0], operands[2], val));
10011 emit_insn (gen_subhi3_carry (operands[0], operands[2], val));
10014 emit_insn (gen_subsi3_carry (operands[0], operands[2], val));
10017 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val));
10025 switch (GET_MODE (operands[0]))
10028 emit_insn (gen_addqi3_carry (operands[0], operands[2], val));
10031 emit_insn (gen_addhi3_carry (operands[0], operands[2], val));
10034 emit_insn (gen_addsi3_carry (operands[0], operands[2], val));
10037 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val));
10043 return 1; /* DONE */
10047 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10048 works for floating pointer parameters and nonoffsetable memories.
10049 For pushes, it returns just stack offsets; the values will be saved
10050 in the right order. Maximally three parts are generated. */
10053 ix86_split_to_parts (operand, parts, mode)
10056 enum machine_mode mode;
10061 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10063 size = (GET_MODE_SIZE (mode) + 4) / 8;
10065 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10067 if (size < 2 || size > 3)
10070 /* Optimize constant pool reference to immediates. This is used by fp
10071 moves, that force all constants to memory to allow combining. */
10072 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10074 rtx tmp = maybe_get_pool_constant (operand);
10079 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10081 /* The only non-offsetable memories we handle are pushes. */
10082 if (! push_operand (operand, VOIDmode))
10085 operand = copy_rtx (operand);
10086 PUT_MODE (operand, Pmode);
10087 parts[0] = parts[1] = parts[2] = operand;
10089 else if (!TARGET_64BIT)
10091 if (mode == DImode)
10092 split_di (&operand, 1, &parts[0], &parts[1]);
10095 if (REG_P (operand))
10097 if (!reload_completed)
10099 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10100 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10102 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10104 else if (offsettable_memref_p (operand))
10106 operand = adjust_address (operand, SImode, 0);
10107 parts[0] = operand;
10108 parts[1] = adjust_address (operand, SImode, 4);
10110 parts[2] = adjust_address (operand, SImode, 8);
10112 else if (GET_CODE (operand) == CONST_DOUBLE)
10117 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10122 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10123 parts[2] = gen_int_mode (l[2], SImode);
10126 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10131 parts[1] = gen_int_mode (l[1], SImode);
10132 parts[0] = gen_int_mode (l[0], SImode);
10140 if (mode == TImode)
10141 split_ti (&operand, 1, &parts[0], &parts[1]);
10142 if (mode == XFmode || mode == TFmode)
10144 if (REG_P (operand))
10146 if (!reload_completed)
10148 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10149 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10151 else if (offsettable_memref_p (operand))
10153 operand = adjust_address (operand, DImode, 0);
10154 parts[0] = operand;
10155 parts[1] = adjust_address (operand, SImode, 8);
10157 else if (GET_CODE (operand) == CONST_DOUBLE)
10162 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10163 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10164 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10165 if (HOST_BITS_PER_WIDE_INT >= 64)
10168 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10169 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10172 parts[0] = immed_double_const (l[0], l[1], DImode);
10173 parts[1] = gen_int_mode (l[2], SImode);
10183 /* Emit insns to perform a move or push of DI, DF, and XF values.
10184 Return false when normal moves are needed; true when all required
10185 insns have been emitted. Operands 2-4 contain the input values
10186 int the correct order; operands 5-7 contain the output values. */
10189 ix86_split_long_move (operands)
10195 int collisions = 0;
10196 enum machine_mode mode = GET_MODE (operands[0]);
10198 /* The DFmode expanders may ask us to move double.
10199 For 64bit target this is single move. By hiding the fact
10200 here we simplify i386.md splitters. */
10201 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10203 /* Optimize constant pool reference to immediates. This is used by
10204 fp moves, that force all constants to memory to allow combining. */
10206 if (GET_CODE (operands[1]) == MEM
10207 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10208 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10209 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10210 if (push_operand (operands[0], VOIDmode))
10212 operands[0] = copy_rtx (operands[0]);
10213 PUT_MODE (operands[0], Pmode);
10216 operands[0] = gen_lowpart (DImode, operands[0]);
10217 operands[1] = gen_lowpart (DImode, operands[1]);
10218 emit_move_insn (operands[0], operands[1]);
10222 /* The only non-offsettable memory we handle is push. */
10223 if (push_operand (operands[0], VOIDmode))
10225 else if (GET_CODE (operands[0]) == MEM
10226 && ! offsettable_memref_p (operands[0]))
10229 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10230 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10232 /* When emitting push, take care for source operands on the stack. */
10233 if (push && GET_CODE (operands[1]) == MEM
10234 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10237 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10238 XEXP (part[1][2], 0));
10239 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10240 XEXP (part[1][1], 0));
10243 /* We need to do copy in the right order in case an address register
10244 of the source overlaps the destination. */
10245 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10247 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10249 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10252 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10255 /* Collision in the middle part can be handled by reordering. */
10256 if (collisions == 1 && nparts == 3
10257 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10260 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10261 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10264 /* If there are more collisions, we can't handle it by reordering.
10265 Do an lea to the last part and use only one colliding move. */
10266 else if (collisions > 1)
10269 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10270 XEXP (part[1][0], 0)));
10271 part[1][0] = change_address (part[1][0],
10272 TARGET_64BIT ? DImode : SImode,
10273 part[0][nparts - 1]);
10274 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10276 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10286 /* We use only first 12 bytes of TFmode value, but for pushing we
10287 are required to adjust stack as if we were pushing real 16byte
10289 if (mode == TFmode && !TARGET_64BIT)
10290 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10292 emit_move_insn (part[0][2], part[1][2]);
10297 /* In 64bit mode we don't have 32bit push available. In case this is
10298 register, it is OK - we will just use larger counterpart. We also
10299 retype memory - these comes from attempt to avoid REX prefix on
10300 moving of second half of TFmode value. */
10301 if (GET_MODE (part[1][1]) == SImode)
10303 if (GET_CODE (part[1][1]) == MEM)
10304 part[1][1] = adjust_address (part[1][1], DImode, 0);
10305 else if (REG_P (part[1][1]))
10306 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10309 if (GET_MODE (part[1][0]) == SImode)
10310 part[1][0] = part[1][1];
10313 emit_move_insn (part[0][1], part[1][1]);
10314 emit_move_insn (part[0][0], part[1][0]);
10318 /* Choose correct order to not overwrite the source before it is copied. */
10319 if ((REG_P (part[0][0])
10320 && REG_P (part[1][1])
10321 && (REGNO (part[0][0]) == REGNO (part[1][1])
10323 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10325 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10329 operands[2] = part[0][2];
10330 operands[3] = part[0][1];
10331 operands[4] = part[0][0];
10332 operands[5] = part[1][2];
10333 operands[6] = part[1][1];
10334 operands[7] = part[1][0];
10338 operands[2] = part[0][1];
10339 operands[3] = part[0][0];
10340 operands[5] = part[1][1];
10341 operands[6] = part[1][0];
10348 operands[2] = part[0][0];
10349 operands[3] = part[0][1];
10350 operands[4] = part[0][2];
10351 operands[5] = part[1][0];
10352 operands[6] = part[1][1];
10353 operands[7] = part[1][2];
10357 operands[2] = part[0][0];
10358 operands[3] = part[0][1];
10359 operands[5] = part[1][0];
10360 operands[6] = part[1][1];
10363 emit_move_insn (operands[2], operands[5]);
10364 emit_move_insn (operands[3], operands[6]);
10366 emit_move_insn (operands[4], operands[7]);
10372 ix86_split_ashldi (operands, scratch)
10373 rtx *operands, scratch;
10375 rtx low[2], high[2];
10378 if (GET_CODE (operands[2]) == CONST_INT)
10380 split_di (operands, 2, low, high);
10381 count = INTVAL (operands[2]) & 63;
10385 emit_move_insn (high[0], low[1]);
10386 emit_move_insn (low[0], const0_rtx);
10389 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10393 if (!rtx_equal_p (operands[0], operands[1]))
10394 emit_move_insn (operands[0], operands[1]);
10395 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10396 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10401 if (!rtx_equal_p (operands[0], operands[1]))
10402 emit_move_insn (operands[0], operands[1]);
10404 split_di (operands, 1, low, high);
10406 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10407 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10409 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10411 if (! no_new_pseudos)
10412 scratch = force_reg (SImode, const0_rtx);
10414 emit_move_insn (scratch, const0_rtx);
10416 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10420 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10425 ix86_split_ashrdi (operands, scratch)
10426 rtx *operands, scratch;
10428 rtx low[2], high[2];
10431 if (GET_CODE (operands[2]) == CONST_INT)
10433 split_di (operands, 2, low, high);
10434 count = INTVAL (operands[2]) & 63;
10438 emit_move_insn (low[0], high[1]);
10440 if (! reload_completed)
10441 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10444 emit_move_insn (high[0], low[0]);
10445 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10449 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10453 if (!rtx_equal_p (operands[0], operands[1]))
10454 emit_move_insn (operands[0], operands[1]);
10455 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10456 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10461 if (!rtx_equal_p (operands[0], operands[1]))
10462 emit_move_insn (operands[0], operands[1]);
10464 split_di (operands, 1, low, high);
10466 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10467 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10469 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10471 if (! no_new_pseudos)
10472 scratch = gen_reg_rtx (SImode);
10473 emit_move_insn (scratch, high[0]);
10474 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10475 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10479 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10484 ix86_split_lshrdi (operands, scratch)
10485 rtx *operands, scratch;
10487 rtx low[2], high[2];
10490 if (GET_CODE (operands[2]) == CONST_INT)
10492 split_di (operands, 2, low, high);
10493 count = INTVAL (operands[2]) & 63;
10497 emit_move_insn (low[0], high[1]);
10498 emit_move_insn (high[0], const0_rtx);
10501 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10505 if (!rtx_equal_p (operands[0], operands[1]))
10506 emit_move_insn (operands[0], operands[1]);
10507 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10508 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10513 if (!rtx_equal_p (operands[0], operands[1]))
10514 emit_move_insn (operands[0], operands[1]);
10516 split_di (operands, 1, low, high);
10518 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10519 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10521 /* Heh. By reversing the arguments, we can reuse this pattern. */
10522 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10524 if (! no_new_pseudos)
10525 scratch = force_reg (SImode, const0_rtx);
10527 emit_move_insn (scratch, const0_rtx);
10529 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10533 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10537 /* Helper function for the string operations below. Dest VARIABLE whether
10538 it is aligned to VALUE bytes. If true, jump to the label. */
10540 ix86_expand_aligntest (variable, value)
10544 rtx label = gen_label_rtx ();
10545 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10546 if (GET_MODE (variable) == DImode)
10547 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10549 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10550 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10555 /* Adjust COUNTER by the VALUE. */
10557 ix86_adjust_counter (countreg, value)
10559 HOST_WIDE_INT value;
10561 if (GET_MODE (countreg) == DImode)
10562 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10564 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10567 /* Zero extend possibly SImode EXP to Pmode register. */
10569 ix86_zero_extend_to_Pmode (exp)
10573 if (GET_MODE (exp) == VOIDmode)
10574 return force_reg (Pmode, exp);
10575 if (GET_MODE (exp) == Pmode)
10576 return copy_to_mode_reg (Pmode, exp);
10577 r = gen_reg_rtx (Pmode);
10578 emit_insn (gen_zero_extendsidi2 (r, exp));
10582 /* Expand string move (memcpy) operation. Use i386 string operations when
10583 profitable. expand_clrstr contains similar code. */
10585 ix86_expand_movstr (dst, src, count_exp, align_exp)
10586 rtx dst, src, count_exp, align_exp;
10588 rtx srcreg, destreg, countreg;
10589 enum machine_mode counter_mode;
10590 HOST_WIDE_INT align = 0;
10591 unsigned HOST_WIDE_INT count = 0;
10596 if (GET_CODE (align_exp) == CONST_INT)
10597 align = INTVAL (align_exp);
10599 /* This simple hack avoids all inlining code and simplifies code below. */
10600 if (!TARGET_ALIGN_STRINGOPS)
10603 if (GET_CODE (count_exp) == CONST_INT)
10604 count = INTVAL (count_exp);
10606 /* Figure out proper mode for counter. For 32bits it is always SImode,
10607 for 64bits use SImode when possible, otherwise DImode.
10608 Set count to number of bytes copied when known at compile time. */
10609 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10610 || x86_64_zero_extended_value (count_exp))
10611 counter_mode = SImode;
10613 counter_mode = DImode;
10615 if (counter_mode != SImode && counter_mode != DImode)
10618 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10619 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10621 emit_insn (gen_cld ());
10623 /* When optimizing for size emit simple rep ; movsb instruction for
10624 counts not divisible by 4. */
10626 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10628 countreg = ix86_zero_extend_to_Pmode (count_exp);
10630 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10631 destreg, srcreg, countreg));
10633 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10634 destreg, srcreg, countreg));
10637 /* For constant aligned (or small unaligned) copies use rep movsl
10638 followed by code copying the rest. For PentiumPro ensure 8 byte
10639 alignment to allow rep movsl acceleration. */
10641 else if (count != 0
10643 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10644 || optimize_size || count < (unsigned int) 64))
10646 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10647 if (count & ~(size - 1))
10649 countreg = copy_to_mode_reg (counter_mode,
10650 GEN_INT ((count >> (size == 4 ? 2 : 3))
10651 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10652 countreg = ix86_zero_extend_to_Pmode (countreg);
10656 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10657 destreg, srcreg, countreg));
10659 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10660 destreg, srcreg, countreg));
10663 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10664 destreg, srcreg, countreg));
10666 if (size == 8 && (count & 0x04))
10667 emit_insn (gen_strmovsi (destreg, srcreg));
10669 emit_insn (gen_strmovhi (destreg, srcreg));
10671 emit_insn (gen_strmovqi (destreg, srcreg));
10673 /* The generic code based on the glibc implementation:
10674 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10675 allowing accelerated copying there)
10676 - copy the data using rep movsl
10677 - copy the rest. */
10682 int desired_alignment = (TARGET_PENTIUMPRO
10683 && (count == 0 || count >= (unsigned int) 260)
10684 ? 8 : UNITS_PER_WORD);
10686 /* In case we don't know anything about the alignment, default to
10687 library version, since it is usually equally fast and result in
10690 Also emit call when we know that the count is large and call overhead
10691 will not be important. */
10692 if (!TARGET_INLINE_ALL_STRINGOPS
10693 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10699 if (TARGET_SINGLE_STRINGOP)
10700 emit_insn (gen_cld ());
10702 countreg2 = gen_reg_rtx (Pmode);
10703 countreg = copy_to_mode_reg (counter_mode, count_exp);
10705 /* We don't use loops to align destination and to copy parts smaller
10706 than 4 bytes, because gcc is able to optimize such code better (in
10707 the case the destination or the count really is aligned, gcc is often
10708 able to predict the branches) and also it is friendlier to the
10709 hardware branch prediction.
10711 Using loops is beneficial for generic case, because we can
10712 handle small counts using the loops. Many CPUs (such as Athlon)
10713 have large REP prefix setup costs.
10715 This is quite costy. Maybe we can revisit this decision later or
10716 add some customizability to this code. */
10718 if (count == 0 && align < desired_alignment)
10720 label = gen_label_rtx ();
10721 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10722 LEU, 0, counter_mode, 1, label);
10726 rtx label = ix86_expand_aligntest (destreg, 1);
10727 emit_insn (gen_strmovqi (destreg, srcreg));
10728 ix86_adjust_counter (countreg, 1);
10729 emit_label (label);
10730 LABEL_NUSES (label) = 1;
10734 rtx label = ix86_expand_aligntest (destreg, 2);
10735 emit_insn (gen_strmovhi (destreg, srcreg));
10736 ix86_adjust_counter (countreg, 2);
10737 emit_label (label);
10738 LABEL_NUSES (label) = 1;
10740 if (align <= 4 && desired_alignment > 4)
10742 rtx label = ix86_expand_aligntest (destreg, 4);
10743 emit_insn (gen_strmovsi (destreg, srcreg));
10744 ix86_adjust_counter (countreg, 4);
10745 emit_label (label);
10746 LABEL_NUSES (label) = 1;
10749 if (label && desired_alignment > 4 && !TARGET_64BIT)
10751 emit_label (label);
10752 LABEL_NUSES (label) = 1;
10755 if (!TARGET_SINGLE_STRINGOP)
10756 emit_insn (gen_cld ());
10759 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10761 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10762 destreg, srcreg, countreg2));
10766 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10767 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10768 destreg, srcreg, countreg2));
10773 emit_label (label);
10774 LABEL_NUSES (label) = 1;
10776 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10777 emit_insn (gen_strmovsi (destreg, srcreg));
10778 if ((align <= 4 || count == 0) && TARGET_64BIT)
10780 rtx label = ix86_expand_aligntest (countreg, 4);
10781 emit_insn (gen_strmovsi (destreg, srcreg));
10782 emit_label (label);
10783 LABEL_NUSES (label) = 1;
10785 if (align > 2 && count != 0 && (count & 2))
10786 emit_insn (gen_strmovhi (destreg, srcreg));
10787 if (align <= 2 || count == 0)
10789 rtx label = ix86_expand_aligntest (countreg, 2);
10790 emit_insn (gen_strmovhi (destreg, srcreg));
10791 emit_label (label);
10792 LABEL_NUSES (label) = 1;
10794 if (align > 1 && count != 0 && (count & 1))
10795 emit_insn (gen_strmovqi (destreg, srcreg));
10796 if (align <= 1 || count == 0)
10798 rtx label = ix86_expand_aligntest (countreg, 1);
10799 emit_insn (gen_strmovqi (destreg, srcreg));
10800 emit_label (label);
10801 LABEL_NUSES (label) = 1;
10805 insns = get_insns ();
10808 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10813 /* Expand string clear operation (bzero). Use i386 string operations when
10814 profitable. expand_movstr contains similar code. */
10816 ix86_expand_clrstr (src, count_exp, align_exp)
10817 rtx src, count_exp, align_exp;
10819 rtx destreg, zeroreg, countreg;
10820 enum machine_mode counter_mode;
10821 HOST_WIDE_INT align = 0;
10822 unsigned HOST_WIDE_INT count = 0;
10824 if (GET_CODE (align_exp) == CONST_INT)
10825 align = INTVAL (align_exp);
10827 /* This simple hack avoids all inlining code and simplifies code below. */
10828 if (!TARGET_ALIGN_STRINGOPS)
10831 if (GET_CODE (count_exp) == CONST_INT)
10832 count = INTVAL (count_exp);
10833 /* Figure out proper mode for counter. For 32bits it is always SImode,
10834 for 64bits use SImode when possible, otherwise DImode.
10835 Set count to number of bytes copied when known at compile time. */
10836 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10837 || x86_64_zero_extended_value (count_exp))
10838 counter_mode = SImode;
10840 counter_mode = DImode;
10842 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10844 emit_insn (gen_cld ());
10846 /* When optimizing for size emit simple rep ; movsb instruction for
10847 counts not divisible by 4. */
10849 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10851 countreg = ix86_zero_extend_to_Pmode (count_exp);
10852 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10854 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10855 destreg, countreg));
10857 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10858 destreg, countreg));
10860 else if (count != 0
10862 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10863 || optimize_size || count < (unsigned int) 64))
10865 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10866 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10867 if (count & ~(size - 1))
10869 countreg = copy_to_mode_reg (counter_mode,
10870 GEN_INT ((count >> (size == 4 ? 2 : 3))
10871 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10872 countreg = ix86_zero_extend_to_Pmode (countreg);
10876 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10877 destreg, countreg));
10879 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10880 destreg, countreg));
10883 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10884 destreg, countreg));
10886 if (size == 8 && (count & 0x04))
10887 emit_insn (gen_strsetsi (destreg,
10888 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10890 emit_insn (gen_strsethi (destreg,
10891 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10893 emit_insn (gen_strsetqi (destreg,
10894 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10900 /* Compute desired alignment of the string operation. */
10901 int desired_alignment = (TARGET_PENTIUMPRO
10902 && (count == 0 || count >= (unsigned int) 260)
10903 ? 8 : UNITS_PER_WORD);
10905 /* In case we don't know anything about the alignment, default to
10906 library version, since it is usually equally fast and result in
10909 Also emit call when we know that the count is large and call overhead
10910 will not be important. */
10911 if (!TARGET_INLINE_ALL_STRINGOPS
10912 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10915 if (TARGET_SINGLE_STRINGOP)
10916 emit_insn (gen_cld ());
10918 countreg2 = gen_reg_rtx (Pmode);
10919 countreg = copy_to_mode_reg (counter_mode, count_exp);
10920 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10922 if (count == 0 && align < desired_alignment)
10924 label = gen_label_rtx ();
10925 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10926 LEU, 0, counter_mode, 1, label);
10930 rtx label = ix86_expand_aligntest (destreg, 1);
10931 emit_insn (gen_strsetqi (destreg,
10932 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10933 ix86_adjust_counter (countreg, 1);
10934 emit_label (label);
10935 LABEL_NUSES (label) = 1;
10939 rtx label = ix86_expand_aligntest (destreg, 2);
10940 emit_insn (gen_strsethi (destreg,
10941 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10942 ix86_adjust_counter (countreg, 2);
10943 emit_label (label);
10944 LABEL_NUSES (label) = 1;
10946 if (align <= 4 && desired_alignment > 4)
10948 rtx label = ix86_expand_aligntest (destreg, 4);
10949 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10950 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10952 ix86_adjust_counter (countreg, 4);
10953 emit_label (label);
10954 LABEL_NUSES (label) = 1;
10957 if (label && desired_alignment > 4 && !TARGET_64BIT)
10959 emit_label (label);
10960 LABEL_NUSES (label) = 1;
10964 if (!TARGET_SINGLE_STRINGOP)
10965 emit_insn (gen_cld ());
10968 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10970 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10971 destreg, countreg2));
10975 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10976 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10977 destreg, countreg2));
10981 emit_label (label);
10982 LABEL_NUSES (label) = 1;
10985 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10986 emit_insn (gen_strsetsi (destreg,
10987 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10988 if (TARGET_64BIT && (align <= 4 || count == 0))
10990 rtx label = ix86_expand_aligntest (countreg, 4);
10991 emit_insn (gen_strsetsi (destreg,
10992 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10993 emit_label (label);
10994 LABEL_NUSES (label) = 1;
10996 if (align > 2 && count != 0 && (count & 2))
10997 emit_insn (gen_strsethi (destreg,
10998 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10999 if (align <= 2 || count == 0)
11001 rtx label = ix86_expand_aligntest (countreg, 2);
11002 emit_insn (gen_strsethi (destreg,
11003 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11004 emit_label (label);
11005 LABEL_NUSES (label) = 1;
11007 if (align > 1 && count != 0 && (count & 1))
11008 emit_insn (gen_strsetqi (destreg,
11009 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11010 if (align <= 1 || count == 0)
11012 rtx label = ix86_expand_aligntest (countreg, 1);
11013 emit_insn (gen_strsetqi (destreg,
11014 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11015 emit_label (label);
11016 LABEL_NUSES (label) = 1;
11021 /* Expand strlen. */
11023 ix86_expand_strlen (out, src, eoschar, align)
11024 rtx out, src, eoschar, align;
11026 rtx addr, scratch1, scratch2, scratch3, scratch4;
11028 /* The generic case of strlen expander is long. Avoid it's
11029 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11031 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11032 && !TARGET_INLINE_ALL_STRINGOPS
11034 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11037 addr = force_reg (Pmode, XEXP (src, 0));
11038 scratch1 = gen_reg_rtx (Pmode);
11040 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11043 /* Well it seems that some optimizer does not combine a call like
11044 foo(strlen(bar), strlen(bar));
11045 when the move and the subtraction is done here. It does calculate
11046 the length just once when these instructions are done inside of
11047 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11048 often used and I use one fewer register for the lifetime of
11049 output_strlen_unroll() this is better. */
11051 emit_move_insn (out, addr);
11053 ix86_expand_strlensi_unroll_1 (out, align);
11055 /* strlensi_unroll_1 returns the address of the zero at the end of
11056 the string, like memchr(), so compute the length by subtracting
11057 the start address. */
11059 emit_insn (gen_subdi3 (out, out, addr));
11061 emit_insn (gen_subsi3 (out, out, addr));
11065 scratch2 = gen_reg_rtx (Pmode);
11066 scratch3 = gen_reg_rtx (Pmode);
11067 scratch4 = force_reg (Pmode, constm1_rtx);
11069 emit_move_insn (scratch3, addr);
11070 eoschar = force_reg (QImode, eoschar);
11072 emit_insn (gen_cld ());
11075 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11076 align, scratch4, scratch3));
11077 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11078 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11082 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11083 align, scratch4, scratch3));
11084 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11085 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11091 /* Expand the appropriate insns for doing strlen if not just doing
11094 out = result, initialized with the start address
11095 align_rtx = alignment of the address.
11096 scratch = scratch register, initialized with the startaddress when
11097 not aligned, otherwise undefined
11099 This is just the body. It needs the initialisations mentioned above and
11100 some address computing at the end. These things are done in i386.md. */
11103 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11104 rtx out, align_rtx;
11108 rtx align_2_label = NULL_RTX;
11109 rtx align_3_label = NULL_RTX;
11110 rtx align_4_label = gen_label_rtx ();
11111 rtx end_0_label = gen_label_rtx ();
11113 rtx tmpreg = gen_reg_rtx (SImode);
11114 rtx scratch = gen_reg_rtx (SImode);
11117 if (GET_CODE (align_rtx) == CONST_INT)
11118 align = INTVAL (align_rtx);
11120 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11122 /* Is there a known alignment and is it less than 4? */
11125 rtx scratch1 = gen_reg_rtx (Pmode);
11126 emit_move_insn (scratch1, out);
11127 /* Is there a known alignment and is it not 2? */
11130 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11131 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11133 /* Leave just the 3 lower bits. */
11134 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11135 NULL_RTX, 0, OPTAB_WIDEN);
11137 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11138 Pmode, 1, align_4_label);
11139 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11140 Pmode, 1, align_2_label);
11141 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11142 Pmode, 1, align_3_label);
11146 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11147 check if is aligned to 4 - byte. */
11149 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11150 NULL_RTX, 0, OPTAB_WIDEN);
11152 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11153 Pmode, 1, align_4_label);
11156 mem = gen_rtx_MEM (QImode, out);
11158 /* Now compare the bytes. */
11160 /* Compare the first n unaligned byte on a byte per byte basis. */
11161 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11162 QImode, 1, end_0_label);
11164 /* Increment the address. */
11166 emit_insn (gen_adddi3 (out, out, const1_rtx));
11168 emit_insn (gen_addsi3 (out, out, const1_rtx));
11170 /* Not needed with an alignment of 2 */
11173 emit_label (align_2_label);
11175 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11179 emit_insn (gen_adddi3 (out, out, const1_rtx));
11181 emit_insn (gen_addsi3 (out, out, const1_rtx));
11183 emit_label (align_3_label);
11186 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11190 emit_insn (gen_adddi3 (out, out, const1_rtx));
11192 emit_insn (gen_addsi3 (out, out, const1_rtx));
11195 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11196 align this loop. It gives only huge programs, but does not help to
11198 emit_label (align_4_label);
11200 mem = gen_rtx_MEM (SImode, out);
11201 emit_move_insn (scratch, mem);
11203 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11205 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11207 /* This formula yields a nonzero result iff one of the bytes is zero.
11208 This saves three branches inside loop and many cycles. */
11210 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11211 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11212 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11213 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11214 gen_int_mode (0x80808080, SImode)));
11215 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11220 rtx reg = gen_reg_rtx (SImode);
11221 rtx reg2 = gen_reg_rtx (Pmode);
11222 emit_move_insn (reg, tmpreg);
11223 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11225 /* If zero is not in the first two bytes, move two bytes forward. */
11226 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11227 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11228 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11229 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11230 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11233 /* Emit lea manually to avoid clobbering of flags. */
11234 emit_insn (gen_rtx_SET (SImode, reg2,
11235 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11237 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11238 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11239 emit_insn (gen_rtx_SET (VOIDmode, out,
11240 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11247 rtx end_2_label = gen_label_rtx ();
11248 /* Is zero in the first two bytes? */
11250 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11251 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11252 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11253 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11254 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11256 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11257 JUMP_LABEL (tmp) = end_2_label;
11259 /* Not in the first two. Move two bytes forward. */
11260 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11262 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11264 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11266 emit_label (end_2_label);
11270 /* Avoid branch in fixing the byte. */
11271 tmpreg = gen_lowpart (QImode, tmpreg);
11272 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11274 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11276 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11278 emit_label (end_0_label);
11282 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11283 rtx retval, fnaddr, callarg1, callarg2, pop;
11286 rtx use = NULL, call;
11288 if (pop == const0_rtx)
11290 if (TARGET_64BIT && pop)
11294 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11295 fnaddr = machopic_indirect_call_target (fnaddr);
11297 /* Static functions and indirect calls don't need the pic register. */
11298 if (! TARGET_64BIT && flag_pic
11299 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11300 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11301 use_reg (&use, pic_offset_table_rtx);
11303 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11305 rtx al = gen_rtx_REG (QImode, 0);
11306 emit_move_insn (al, callarg2);
11307 use_reg (&use, al);
11309 #endif /* TARGET_MACHO */
11311 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11313 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11314 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11316 if (sibcall && TARGET_64BIT
11317 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11320 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11321 fnaddr = gen_rtx_REG (Pmode, 40);
11322 emit_move_insn (fnaddr, addr);
11323 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11326 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11328 call = gen_rtx_SET (VOIDmode, retval, call);
11331 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11332 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11333 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11336 call = emit_call_insn (call);
11338 CALL_INSN_FUNCTION_USAGE (call) = use;
11342 /* Clear stack slot assignments remembered from previous functions.
11343 This is called from INIT_EXPANDERS once before RTL is emitted for each
11346 static struct machine_function *
11347 ix86_init_machine_status ()
11349 return ggc_alloc_cleared (sizeof (struct machine_function));
11352 /* Return a MEM corresponding to a stack slot with mode MODE.
11353 Allocate a new slot if necessary.
11355 The RTL for a function can have several slots available: N is
11356 which slot to use. */
11359 assign_386_stack_local (mode, n)
11360 enum machine_mode mode;
11363 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11366 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11367 ix86_stack_locals[(int) mode][n]
11368 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11370 return ix86_stack_locals[(int) mode][n];
11373 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11375 static GTY(()) rtx ix86_tls_symbol;
11377 ix86_tls_get_addr ()
11380 if (!ix86_tls_symbol)
11382 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11383 (TARGET_GNU_TLS && !TARGET_64BIT)
11384 ? "___tls_get_addr"
11385 : "__tls_get_addr");
11388 return ix86_tls_symbol;
11391 /* Calculate the length of the memory address in the instruction
11392 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11395 memory_address_length (addr)
11398 struct ix86_address parts;
11399 rtx base, index, disp;
11402 if (GET_CODE (addr) == PRE_DEC
11403 || GET_CODE (addr) == POST_INC
11404 || GET_CODE (addr) == PRE_MODIFY
11405 || GET_CODE (addr) == POST_MODIFY)
11408 if (! ix86_decompose_address (addr, &parts))
11412 index = parts.index;
11416 /* Register Indirect. */
11417 if (base && !index && !disp)
11419 /* Special cases: ebp and esp need the two-byte modrm form. */
11420 if (addr == stack_pointer_rtx
11421 || addr == arg_pointer_rtx
11422 || addr == frame_pointer_rtx
11423 || addr == hard_frame_pointer_rtx)
11427 /* Direct Addressing. */
11428 else if (disp && !base && !index)
11433 /* Find the length of the displacement constant. */
11436 if (GET_CODE (disp) == CONST_INT
11437 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11443 /* An index requires the two-byte modrm form. */
11451 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11452 is set, expect that insn have 8bit immediate alternative. */
11454 ix86_attr_length_immediate_default (insn, shortform)
11460 extract_insn_cached (insn);
11461 for (i = recog_data.n_operands - 1; i >= 0; --i)
11462 if (CONSTANT_P (recog_data.operand[i]))
11467 && GET_CODE (recog_data.operand[i]) == CONST_INT
11468 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11472 switch (get_attr_mode (insn))
11483 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11488 fatal_insn ("unknown insn mode", insn);
11494 /* Compute default value for "length_address" attribute. */
11496 ix86_attr_length_address_default (insn)
11500 extract_insn_cached (insn);
11501 for (i = recog_data.n_operands - 1; i >= 0; --i)
11502 if (GET_CODE (recog_data.operand[i]) == MEM)
11504 return memory_address_length (XEXP (recog_data.operand[i], 0));
11510 /* Return the maximum number of instructions a cpu can issue. */
11517 case PROCESSOR_PENTIUM:
11521 case PROCESSOR_PENTIUMPRO:
11522 case PROCESSOR_PENTIUM4:
11523 case PROCESSOR_ATHLON:
11532 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11533 by DEP_INSN and nothing set by DEP_INSN. */
11536 ix86_flags_dependant (insn, dep_insn, insn_type)
11537 rtx insn, dep_insn;
11538 enum attr_type insn_type;
11542 /* Simplify the test for uninteresting insns. */
11543 if (insn_type != TYPE_SETCC
11544 && insn_type != TYPE_ICMOV
11545 && insn_type != TYPE_FCMOV
11546 && insn_type != TYPE_IBR)
11549 if ((set = single_set (dep_insn)) != 0)
11551 set = SET_DEST (set);
11554 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11555 && XVECLEN (PATTERN (dep_insn), 0) == 2
11556 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11557 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11559 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11560 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11565 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11568 /* This test is true if the dependent insn reads the flags but
11569 not any other potentially set register. */
11570 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11573 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11579 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11580 address with operands set by DEP_INSN. */
11583 ix86_agi_dependant (insn, dep_insn, insn_type)
11584 rtx insn, dep_insn;
11585 enum attr_type insn_type;
11589 if (insn_type == TYPE_LEA
11592 addr = PATTERN (insn);
11593 if (GET_CODE (addr) == SET)
11595 else if (GET_CODE (addr) == PARALLEL
11596 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11597 addr = XVECEXP (addr, 0, 0);
11600 addr = SET_SRC (addr);
11605 extract_insn_cached (insn);
11606 for (i = recog_data.n_operands - 1; i >= 0; --i)
11607 if (GET_CODE (recog_data.operand[i]) == MEM)
11609 addr = XEXP (recog_data.operand[i], 0);
11616 return modified_in_p (addr, dep_insn);
11620 ix86_adjust_cost (insn, link, dep_insn, cost)
11621 rtx insn, link, dep_insn;
11624 enum attr_type insn_type, dep_insn_type;
11625 enum attr_memory memory, dep_memory;
11627 int dep_insn_code_number;
11629 /* Anti and output dependencies have zero cost on all CPUs. */
11630 if (REG_NOTE_KIND (link) != 0)
11633 dep_insn_code_number = recog_memoized (dep_insn);
11635 /* If we can't recognize the insns, we can't really do anything. */
11636 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11639 insn_type = get_attr_type (insn);
11640 dep_insn_type = get_attr_type (dep_insn);
11644 case PROCESSOR_PENTIUM:
11645 /* Address Generation Interlock adds a cycle of latency. */
11646 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11649 /* ??? Compares pair with jump/setcc. */
11650 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11653 /* Floating point stores require value to be ready one cycle earlier. */
11654 if (insn_type == TYPE_FMOV
11655 && get_attr_memory (insn) == MEMORY_STORE
11656 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11660 case PROCESSOR_PENTIUMPRO:
11661 memory = get_attr_memory (insn);
11662 dep_memory = get_attr_memory (dep_insn);
11664 /* Since we can't represent delayed latencies of load+operation,
11665 increase the cost here for non-imov insns. */
11666 if (dep_insn_type != TYPE_IMOV
11667 && dep_insn_type != TYPE_FMOV
11668 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11671 /* INT->FP conversion is expensive. */
11672 if (get_attr_fp_int_src (dep_insn))
11675 /* There is one cycle extra latency between an FP op and a store. */
11676 if (insn_type == TYPE_FMOV
11677 && (set = single_set (dep_insn)) != NULL_RTX
11678 && (set2 = single_set (insn)) != NULL_RTX
11679 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11680 && GET_CODE (SET_DEST (set2)) == MEM)
11683 /* Show ability of reorder buffer to hide latency of load by executing
11684 in parallel with previous instruction in case
11685 previous instruction is not needed to compute the address. */
11686 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11687 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11689 /* Claim moves to take one cycle, as core can issue one load
11690 at time and the next load can start cycle later. */
11691 if (dep_insn_type == TYPE_IMOV
11692 || dep_insn_type == TYPE_FMOV)
11700 memory = get_attr_memory (insn);
11701 dep_memory = get_attr_memory (dep_insn);
11702 /* The esp dependency is resolved before the instruction is really
11704 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11705 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11708 /* Since we can't represent delayed latencies of load+operation,
11709 increase the cost here for non-imov insns. */
11710 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11711 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11713 /* INT->FP conversion is expensive. */
11714 if (get_attr_fp_int_src (dep_insn))
11717 /* Show ability of reorder buffer to hide latency of load by executing
11718 in parallel with previous instruction in case
11719 previous instruction is not needed to compute the address. */
11720 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11721 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11723 /* Claim moves to take one cycle, as core can issue one load
11724 at time and the next load can start cycle later. */
11725 if (dep_insn_type == TYPE_IMOV
11726 || dep_insn_type == TYPE_FMOV)
11735 case PROCESSOR_ATHLON:
11737 memory = get_attr_memory (insn);
11738 dep_memory = get_attr_memory (dep_insn);
11740 /* Show ability of reorder buffer to hide latency of load by executing
11741 in parallel with previous instruction in case
11742 previous instruction is not needed to compute the address. */
11743 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11744 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11746 /* Claim moves to take one cycle, as core can issue one load
11747 at time and the next load can start cycle later. */
11748 if (dep_insn_type == TYPE_IMOV
11749 || dep_insn_type == TYPE_FMOV)
11751 else if (cost >= 3)
11766 struct ppro_sched_data
11769 int issued_this_cycle;
11773 static enum attr_ppro_uops
11774 ix86_safe_ppro_uops (insn)
11777 if (recog_memoized (insn) >= 0)
11778 return get_attr_ppro_uops (insn);
11780 return PPRO_UOPS_MANY;
11784 ix86_dump_ppro_packet (dump)
11787 if (ix86_sched_data.ppro.decode[0])
11789 fprintf (dump, "PPRO packet: %d",
11790 INSN_UID (ix86_sched_data.ppro.decode[0]));
11791 if (ix86_sched_data.ppro.decode[1])
11792 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11793 if (ix86_sched_data.ppro.decode[2])
11794 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11795 fputc ('\n', dump);
11799 /* We're beginning a new block. Initialize data structures as necessary. */
11802 ix86_sched_init (dump, sched_verbose, veclen)
11803 FILE *dump ATTRIBUTE_UNUSED;
11804 int sched_verbose ATTRIBUTE_UNUSED;
11805 int veclen ATTRIBUTE_UNUSED;
11807 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11810 /* Shift INSN to SLOT, and shift everything else down. */
11813 ix86_reorder_insn (insnp, slot)
11820 insnp[0] = insnp[1];
11821 while (++insnp != slot);
11827 ix86_sched_reorder_ppro (ready, e_ready)
11832 enum attr_ppro_uops cur_uops;
11833 int issued_this_cycle;
11837 /* At this point .ppro.decode contains the state of the three
11838 decoders from last "cycle". That is, those insns that were
11839 actually independent. But here we're scheduling for the
11840 decoder, and we may find things that are decodable in the
11843 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11844 issued_this_cycle = 0;
11847 cur_uops = ix86_safe_ppro_uops (*insnp);
11849 /* If the decoders are empty, and we've a complex insn at the
11850 head of the priority queue, let it issue without complaint. */
11851 if (decode[0] == NULL)
11853 if (cur_uops == PPRO_UOPS_MANY)
11855 decode[0] = *insnp;
11859 /* Otherwise, search for a 2-4 uop unsn to issue. */
11860 while (cur_uops != PPRO_UOPS_FEW)
11862 if (insnp == ready)
11864 cur_uops = ix86_safe_ppro_uops (*--insnp);
11867 /* If so, move it to the head of the line. */
11868 if (cur_uops == PPRO_UOPS_FEW)
11869 ix86_reorder_insn (insnp, e_ready);
11871 /* Issue the head of the queue. */
11872 issued_this_cycle = 1;
11873 decode[0] = *e_ready--;
11876 /* Look for simple insns to fill in the other two slots. */
11877 for (i = 1; i < 3; ++i)
11878 if (decode[i] == NULL)
11880 if (ready > e_ready)
11884 cur_uops = ix86_safe_ppro_uops (*insnp);
11885 while (cur_uops != PPRO_UOPS_ONE)
11887 if (insnp == ready)
11889 cur_uops = ix86_safe_ppro_uops (*--insnp);
11892 /* Found one. Move it to the head of the queue and issue it. */
11893 if (cur_uops == PPRO_UOPS_ONE)
11895 ix86_reorder_insn (insnp, e_ready);
11896 decode[i] = *e_ready--;
11897 issued_this_cycle++;
11901 /* ??? Didn't find one. Ideally, here we would do a lazy split
11902 of 2-uop insns, issue one and queue the other. */
11906 if (issued_this_cycle == 0)
11907 issued_this_cycle = 1;
11908 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11911 /* We are about to being issuing insns for this clock cycle.
11912 Override the default sort algorithm to better slot instructions. */
11914 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11915 FILE *dump ATTRIBUTE_UNUSED;
11916 int sched_verbose ATTRIBUTE_UNUSED;
11919 int clock_var ATTRIBUTE_UNUSED;
11921 int n_ready = *n_readyp;
11922 rtx *e_ready = ready + n_ready - 1;
11924 /* Make sure to go ahead and initialize key items in
11925 ix86_sched_data if we are not going to bother trying to
11926 reorder the ready queue. */
11929 ix86_sched_data.ppro.issued_this_cycle = 1;
11938 case PROCESSOR_PENTIUMPRO:
11939 ix86_sched_reorder_ppro (ready, e_ready);
11944 return ix86_issue_rate ();
11947 /* We are about to issue INSN. Return the number of insns left on the
11948 ready queue that can be issued this cycle. */
11951 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11955 int can_issue_more;
11961 return can_issue_more - 1;
11963 case PROCESSOR_PENTIUMPRO:
11965 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11967 if (uops == PPRO_UOPS_MANY)
11970 ix86_dump_ppro_packet (dump);
11971 ix86_sched_data.ppro.decode[0] = insn;
11972 ix86_sched_data.ppro.decode[1] = NULL;
11973 ix86_sched_data.ppro.decode[2] = NULL;
11975 ix86_dump_ppro_packet (dump);
11976 ix86_sched_data.ppro.decode[0] = NULL;
11978 else if (uops == PPRO_UOPS_FEW)
11981 ix86_dump_ppro_packet (dump);
11982 ix86_sched_data.ppro.decode[0] = insn;
11983 ix86_sched_data.ppro.decode[1] = NULL;
11984 ix86_sched_data.ppro.decode[2] = NULL;
11988 for (i = 0; i < 3; ++i)
11989 if (ix86_sched_data.ppro.decode[i] == NULL)
11991 ix86_sched_data.ppro.decode[i] = insn;
11999 ix86_dump_ppro_packet (dump);
12000 ix86_sched_data.ppro.decode[0] = NULL;
12001 ix86_sched_data.ppro.decode[1] = NULL;
12002 ix86_sched_data.ppro.decode[2] = NULL;
12006 return --ix86_sched_data.ppro.issued_this_cycle;
12011 ia32_use_dfa_pipeline_interface ()
12013 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12018 /* How many alternative schedules to try. This should be as wide as the
12019 scheduling freedom in the DFA, but no wider. Making this value too
12020 large results extra work for the scheduler. */
12023 ia32_multipass_dfa_lookahead ()
12025 if (ix86_cpu == PROCESSOR_PENTIUM)
12032 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12033 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12037 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12039 rtx dstref, srcref, dstreg, srcreg;
12043 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12045 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12049 /* Subroutine of above to actually do the updating by recursively walking
12053 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12055 rtx dstref, srcref, dstreg, srcreg;
12057 enum rtx_code code = GET_CODE (x);
12058 const char *format_ptr = GET_RTX_FORMAT (code);
12061 if (code == MEM && XEXP (x, 0) == dstreg)
12062 MEM_COPY_ATTRIBUTES (x, dstref);
12063 else if (code == MEM && XEXP (x, 0) == srcreg)
12064 MEM_COPY_ATTRIBUTES (x, srcref);
12066 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12068 if (*format_ptr == 'e')
12069 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12071 else if (*format_ptr == 'E')
12072 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12073 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12078 /* Compute the alignment given to a constant that is being placed in memory.
12079 EXP is the constant and ALIGN is the alignment that the object would
12081 The value of this function is used instead of that alignment to align
12085 ix86_constant_alignment (exp, align)
12089 if (TREE_CODE (exp) == REAL_CST)
12091 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12093 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12096 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12103 /* Compute the alignment for a static variable.
12104 TYPE is the data type, and ALIGN is the alignment that
12105 the object would ordinarily have. The value of this function is used
12106 instead of that alignment to align the object. */
12109 ix86_data_alignment (type, align)
12113 if (AGGREGATE_TYPE_P (type)
12114 && TYPE_SIZE (type)
12115 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12116 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12117 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12120 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12121 to 16byte boundary. */
12124 if (AGGREGATE_TYPE_P (type)
12125 && TYPE_SIZE (type)
12126 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12127 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12128 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12132 if (TREE_CODE (type) == ARRAY_TYPE)
12134 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12136 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12139 else if (TREE_CODE (type) == COMPLEX_TYPE)
12142 if (TYPE_MODE (type) == DCmode && align < 64)
12144 if (TYPE_MODE (type) == XCmode && align < 128)
12147 else if ((TREE_CODE (type) == RECORD_TYPE
12148 || TREE_CODE (type) == UNION_TYPE
12149 || TREE_CODE (type) == QUAL_UNION_TYPE)
12150 && TYPE_FIELDS (type))
12152 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12154 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12157 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12158 || TREE_CODE (type) == INTEGER_TYPE)
12160 if (TYPE_MODE (type) == DFmode && align < 64)
12162 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12169 /* Compute the alignment for a local variable.
12170 TYPE is the data type, and ALIGN is the alignment that
12171 the object would ordinarily have. The value of this macro is used
12172 instead of that alignment to align the object. */
12175 ix86_local_alignment (type, align)
12179 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12180 to 16byte boundary. */
12183 if (AGGREGATE_TYPE_P (type)
12184 && TYPE_SIZE (type)
12185 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12186 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12187 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12190 if (TREE_CODE (type) == ARRAY_TYPE)
12192 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12194 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12197 else if (TREE_CODE (type) == COMPLEX_TYPE)
12199 if (TYPE_MODE (type) == DCmode && align < 64)
12201 if (TYPE_MODE (type) == XCmode && align < 128)
12204 else if ((TREE_CODE (type) == RECORD_TYPE
12205 || TREE_CODE (type) == UNION_TYPE
12206 || TREE_CODE (type) == QUAL_UNION_TYPE)
12207 && TYPE_FIELDS (type))
12209 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12211 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12214 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12215 || TREE_CODE (type) == INTEGER_TYPE)
12218 if (TYPE_MODE (type) == DFmode && align < 64)
12220 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12226 /* Emit RTL insns to initialize the variable parts of a trampoline.
12227 FNADDR is an RTX for the address of the function's pure code.
12228 CXT is an RTX for the static chain value for the function. */
12230 x86_initialize_trampoline (tramp, fnaddr, cxt)
12231 rtx tramp, fnaddr, cxt;
12235 /* Compute offset from the end of the jmp to the target function. */
12236 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12237 plus_constant (tramp, 10),
12238 NULL_RTX, 1, OPTAB_DIRECT);
12239 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12240 gen_int_mode (0xb9, QImode));
12241 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12242 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12243 gen_int_mode (0xe9, QImode));
12244 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12249 /* Try to load address using shorter movl instead of movabs.
12250 We may want to support movq for kernel mode, but kernel does not use
12251 trampolines at the moment. */
12252 if (x86_64_zero_extended_value (fnaddr))
12254 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12255 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12256 gen_int_mode (0xbb41, HImode));
12257 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12258 gen_lowpart (SImode, fnaddr));
12263 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12264 gen_int_mode (0xbb49, HImode));
12265 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12269 /* Load static chain using movabs to r10. */
12270 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12271 gen_int_mode (0xba49, HImode));
12272 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12275 /* Jump to the r11 */
12276 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12277 gen_int_mode (0xff49, HImode));
12278 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12279 gen_int_mode (0xe3, QImode));
12281 if (offset > TRAMPOLINE_SIZE)
12285 #ifdef TRANSFER_FROM_TRAMPOLINE
12286 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12287 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12291 #define def_builtin(MASK, NAME, TYPE, CODE) \
12293 if ((MASK) & target_flags) \
12294 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12295 NULL, NULL_TREE); \
12298 struct builtin_description
12300 const unsigned int mask;
12301 const enum insn_code icode;
12302 const char *const name;
12303 const enum ix86_builtins code;
12304 const enum rtx_code comparison;
12305 const unsigned int flag;
12308 /* Used for builtins that are enabled both by -msse and -msse2. */
12309 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12311 static const struct builtin_description bdesc_comi[] =
12313 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12314 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12315 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12316 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12317 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12318 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12319 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12320 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12321 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12322 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12323 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12324 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12325 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12326 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12327 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12328 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12329 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12330 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12331 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12332 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12333 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12334 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12335 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12336 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12339 static const struct builtin_description bdesc_2arg[] =
12342 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12343 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12344 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12345 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12346 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12347 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12348 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12349 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12351 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12352 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12353 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12354 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12355 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12356 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12357 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12358 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12359 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12360 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12361 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12362 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12363 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12364 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12365 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12366 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12367 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12368 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12369 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12370 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12372 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12373 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12374 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12375 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12377 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12378 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12379 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12380 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12382 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12383 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12384 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12385 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12386 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12389 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12390 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12391 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12392 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12393 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12394 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12396 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12397 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12398 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12399 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12400 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12401 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12402 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12403 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12405 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12406 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12407 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12409 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12410 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12411 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12412 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12414 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12415 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12417 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12418 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12419 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12420 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12421 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12422 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12424 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12425 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12426 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12427 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12429 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12430 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12431 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12432 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12433 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12434 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12437 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12438 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12439 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12441 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12442 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12444 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12445 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12446 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12447 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12448 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12449 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12451 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12452 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12453 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12454 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12455 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12456 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12458 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12459 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12460 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12461 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12463 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12464 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12467 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12468 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12469 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12470 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12471 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12472 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12476 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12477 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12478 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12479 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12480 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12481 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12482 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12483 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12484 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12485 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12486 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12487 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12488 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12489 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12490 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12491 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12492 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12493 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12494 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12495 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12497 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12499 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12500 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12504 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12505 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12507 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12508 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12514 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12515 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12521 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12522 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12523 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12524 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12525 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12526 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12527 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12528 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12530 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12531 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12532 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12533 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12535 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12536 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12537 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12538 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12540 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12541 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12543 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12544 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12545 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12546 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12547 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12548 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12551 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12552 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12553 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12557 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12558 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12564 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12565 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12568 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12569 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12571 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12572 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12573 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12574 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12575 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12576 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12578 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12579 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12580 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12581 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12582 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12583 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12585 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12586 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12587 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12588 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12590 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12592 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12593 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12594 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12597 static const struct builtin_description bdesc_1arg[] =
12599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12600 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12602 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12603 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12604 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12606 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12607 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12608 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12609 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12611 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12612 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12613 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12614 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12616 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12618 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12619 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12621 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12622 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12623 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12624 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12625 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12627 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12632 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12640 ix86_init_builtins ()
12643 ix86_init_mmx_sse_builtins ();
12646 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12647 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12650 ix86_init_mmx_sse_builtins ()
12652 const struct builtin_description * d;
12655 tree pchar_type_node = build_pointer_type (char_type_node);
12656 tree pcchar_type_node = build_pointer_type (
12657 build_type_variant (char_type_node, 1, 0));
12658 tree pfloat_type_node = build_pointer_type (float_type_node);
12659 tree pcfloat_type_node = build_pointer_type (
12660 build_type_variant (float_type_node, 1, 0));
12661 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12662 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12663 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12666 tree int_ftype_v4sf_v4sf
12667 = build_function_type_list (integer_type_node,
12668 V4SF_type_node, V4SF_type_node, NULL_TREE);
12669 tree v4si_ftype_v4sf_v4sf
12670 = build_function_type_list (V4SI_type_node,
12671 V4SF_type_node, V4SF_type_node, NULL_TREE);
12672 /* MMX/SSE/integer conversions. */
12673 tree int_ftype_v4sf
12674 = build_function_type_list (integer_type_node,
12675 V4SF_type_node, NULL_TREE);
12676 tree int_ftype_v8qi
12677 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12678 tree v4sf_ftype_v4sf_int
12679 = build_function_type_list (V4SF_type_node,
12680 V4SF_type_node, integer_type_node, NULL_TREE);
12681 tree v4sf_ftype_v4sf_v2si
12682 = build_function_type_list (V4SF_type_node,
12683 V4SF_type_node, V2SI_type_node, NULL_TREE);
12684 tree int_ftype_v4hi_int
12685 = build_function_type_list (integer_type_node,
12686 V4HI_type_node, integer_type_node, NULL_TREE);
12687 tree v4hi_ftype_v4hi_int_int
12688 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12689 integer_type_node, integer_type_node,
12691 /* Miscellaneous. */
12692 tree v8qi_ftype_v4hi_v4hi
12693 = build_function_type_list (V8QI_type_node,
12694 V4HI_type_node, V4HI_type_node, NULL_TREE);
12695 tree v4hi_ftype_v2si_v2si
12696 = build_function_type_list (V4HI_type_node,
12697 V2SI_type_node, V2SI_type_node, NULL_TREE);
12698 tree v4sf_ftype_v4sf_v4sf_int
12699 = build_function_type_list (V4SF_type_node,
12700 V4SF_type_node, V4SF_type_node,
12701 integer_type_node, NULL_TREE);
12702 tree v2si_ftype_v4hi_v4hi
12703 = build_function_type_list (V2SI_type_node,
12704 V4HI_type_node, V4HI_type_node, NULL_TREE);
12705 tree v4hi_ftype_v4hi_int
12706 = build_function_type_list (V4HI_type_node,
12707 V4HI_type_node, integer_type_node, NULL_TREE);
12708 tree v4hi_ftype_v4hi_di
12709 = build_function_type_list (V4HI_type_node,
12710 V4HI_type_node, long_long_unsigned_type_node,
12712 tree v2si_ftype_v2si_di
12713 = build_function_type_list (V2SI_type_node,
12714 V2SI_type_node, long_long_unsigned_type_node,
12716 tree void_ftype_void
12717 = build_function_type (void_type_node, void_list_node);
12718 tree void_ftype_unsigned
12719 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12720 tree unsigned_ftype_void
12721 = build_function_type (unsigned_type_node, void_list_node);
12723 = build_function_type (long_long_unsigned_type_node, void_list_node);
12724 tree v4sf_ftype_void
12725 = build_function_type (V4SF_type_node, void_list_node);
12726 tree v2si_ftype_v4sf
12727 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12728 /* Loads/stores. */
12729 tree void_ftype_v8qi_v8qi_pchar
12730 = build_function_type_list (void_type_node,
12731 V8QI_type_node, V8QI_type_node,
12732 pchar_type_node, NULL_TREE);
12733 tree v4sf_ftype_pcfloat
12734 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12735 /* @@@ the type is bogus */
12736 tree v4sf_ftype_v4sf_pv2si
12737 = build_function_type_list (V4SF_type_node,
12738 V4SF_type_node, pv2si_type_node, NULL_TREE);
12739 tree void_ftype_pv2si_v4sf
12740 = build_function_type_list (void_type_node,
12741 pv2si_type_node, V4SF_type_node, NULL_TREE);
12742 tree void_ftype_pfloat_v4sf
12743 = build_function_type_list (void_type_node,
12744 pfloat_type_node, V4SF_type_node, NULL_TREE);
12745 tree void_ftype_pdi_di
12746 = build_function_type_list (void_type_node,
12747 pdi_type_node, long_long_unsigned_type_node,
12749 tree void_ftype_pv2di_v2di
12750 = build_function_type_list (void_type_node,
12751 pv2di_type_node, V2DI_type_node, NULL_TREE);
12752 /* Normal vector unops. */
12753 tree v4sf_ftype_v4sf
12754 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12756 /* Normal vector binops. */
12757 tree v4sf_ftype_v4sf_v4sf
12758 = build_function_type_list (V4SF_type_node,
12759 V4SF_type_node, V4SF_type_node, NULL_TREE);
12760 tree v8qi_ftype_v8qi_v8qi
12761 = build_function_type_list (V8QI_type_node,
12762 V8QI_type_node, V8QI_type_node, NULL_TREE);
12763 tree v4hi_ftype_v4hi_v4hi
12764 = build_function_type_list (V4HI_type_node,
12765 V4HI_type_node, V4HI_type_node, NULL_TREE);
12766 tree v2si_ftype_v2si_v2si
12767 = build_function_type_list (V2SI_type_node,
12768 V2SI_type_node, V2SI_type_node, NULL_TREE);
12769 tree di_ftype_di_di
12770 = build_function_type_list (long_long_unsigned_type_node,
12771 long_long_unsigned_type_node,
12772 long_long_unsigned_type_node, NULL_TREE);
12774 tree v2si_ftype_v2sf
12775 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12776 tree v2sf_ftype_v2si
12777 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12778 tree v2si_ftype_v2si
12779 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12780 tree v2sf_ftype_v2sf
12781 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12782 tree v2sf_ftype_v2sf_v2sf
12783 = build_function_type_list (V2SF_type_node,
12784 V2SF_type_node, V2SF_type_node, NULL_TREE);
12785 tree v2si_ftype_v2sf_v2sf
12786 = build_function_type_list (V2SI_type_node,
12787 V2SF_type_node, V2SF_type_node, NULL_TREE);
12788 tree pint_type_node = build_pointer_type (integer_type_node);
12789 tree pcint_type_node = build_pointer_type (
12790 build_type_variant (integer_type_node, 1, 0));
12791 tree pdouble_type_node = build_pointer_type (double_type_node);
12792 tree pcdouble_type_node = build_pointer_type (
12793 build_type_variant (double_type_node, 1, 0));
12794 tree int_ftype_v2df_v2df
12795 = build_function_type_list (integer_type_node,
12796 V2DF_type_node, V2DF_type_node, NULL_TREE);
12799 = build_function_type (intTI_type_node, void_list_node);
12800 tree v2di_ftype_void
12801 = build_function_type (V2DI_type_node, void_list_node);
12802 tree ti_ftype_ti_ti
12803 = build_function_type_list (intTI_type_node,
12804 intTI_type_node, intTI_type_node, NULL_TREE);
12805 tree void_ftype_pcvoid
12806 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12808 = build_function_type_list (V2DI_type_node,
12809 long_long_unsigned_type_node, NULL_TREE);
12811 = build_function_type_list (long_long_unsigned_type_node,
12812 V2DI_type_node, NULL_TREE);
12813 tree v4sf_ftype_v4si
12814 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12815 tree v4si_ftype_v4sf
12816 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12817 tree v2df_ftype_v4si
12818 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12819 tree v4si_ftype_v2df
12820 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12821 tree v2si_ftype_v2df
12822 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12823 tree v4sf_ftype_v2df
12824 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12825 tree v2df_ftype_v2si
12826 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12827 tree v2df_ftype_v4sf
12828 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12829 tree int_ftype_v2df
12830 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12831 tree v2df_ftype_v2df_int
12832 = build_function_type_list (V2DF_type_node,
12833 V2DF_type_node, integer_type_node, NULL_TREE);
12834 tree v4sf_ftype_v4sf_v2df
12835 = build_function_type_list (V4SF_type_node,
12836 V4SF_type_node, V2DF_type_node, NULL_TREE);
12837 tree v2df_ftype_v2df_v4sf
12838 = build_function_type_list (V2DF_type_node,
12839 V2DF_type_node, V4SF_type_node, NULL_TREE);
12840 tree v2df_ftype_v2df_v2df_int
12841 = build_function_type_list (V2DF_type_node,
12842 V2DF_type_node, V2DF_type_node,
12845 tree v2df_ftype_v2df_pv2si
12846 = build_function_type_list (V2DF_type_node,
12847 V2DF_type_node, pv2si_type_node, NULL_TREE);
12848 tree void_ftype_pv2si_v2df
12849 = build_function_type_list (void_type_node,
12850 pv2si_type_node, V2DF_type_node, NULL_TREE);
12851 tree void_ftype_pdouble_v2df
12852 = build_function_type_list (void_type_node,
12853 pdouble_type_node, V2DF_type_node, NULL_TREE);
12854 tree void_ftype_pint_int
12855 = build_function_type_list (void_type_node,
12856 pint_type_node, integer_type_node, NULL_TREE);
12857 tree void_ftype_v16qi_v16qi_pchar
12858 = build_function_type_list (void_type_node,
12859 V16QI_type_node, V16QI_type_node,
12860 pchar_type_node, NULL_TREE);
12861 tree v2df_ftype_pcdouble
12862 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12863 tree v2df_ftype_v2df_v2df
12864 = build_function_type_list (V2DF_type_node,
12865 V2DF_type_node, V2DF_type_node, NULL_TREE);
12866 tree v16qi_ftype_v16qi_v16qi
12867 = build_function_type_list (V16QI_type_node,
12868 V16QI_type_node, V16QI_type_node, NULL_TREE);
12869 tree v8hi_ftype_v8hi_v8hi
12870 = build_function_type_list (V8HI_type_node,
12871 V8HI_type_node, V8HI_type_node, NULL_TREE);
12872 tree v4si_ftype_v4si_v4si
12873 = build_function_type_list (V4SI_type_node,
12874 V4SI_type_node, V4SI_type_node, NULL_TREE);
12875 tree v2di_ftype_v2di_v2di
12876 = build_function_type_list (V2DI_type_node,
12877 V2DI_type_node, V2DI_type_node, NULL_TREE);
12878 tree v2di_ftype_v2df_v2df
12879 = build_function_type_list (V2DI_type_node,
12880 V2DF_type_node, V2DF_type_node, NULL_TREE);
12881 tree v2df_ftype_v2df
12882 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12883 tree v2df_ftype_double
12884 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12885 tree v2df_ftype_double_double
12886 = build_function_type_list (V2DF_type_node,
12887 double_type_node, double_type_node, NULL_TREE);
12888 tree int_ftype_v8hi_int
12889 = build_function_type_list (integer_type_node,
12890 V8HI_type_node, integer_type_node, NULL_TREE);
12891 tree v8hi_ftype_v8hi_int_int
12892 = build_function_type_list (V8HI_type_node,
12893 V8HI_type_node, integer_type_node,
12894 integer_type_node, NULL_TREE);
12895 tree v2di_ftype_v2di_int
12896 = build_function_type_list (V2DI_type_node,
12897 V2DI_type_node, integer_type_node, NULL_TREE);
12898 tree v4si_ftype_v4si_int
12899 = build_function_type_list (V4SI_type_node,
12900 V4SI_type_node, integer_type_node, NULL_TREE);
12901 tree v8hi_ftype_v8hi_int
12902 = build_function_type_list (V8HI_type_node,
12903 V8HI_type_node, integer_type_node, NULL_TREE);
12904 tree v8hi_ftype_v8hi_v2di
12905 = build_function_type_list (V8HI_type_node,
12906 V8HI_type_node, V2DI_type_node, NULL_TREE);
12907 tree v4si_ftype_v4si_v2di
12908 = build_function_type_list (V4SI_type_node,
12909 V4SI_type_node, V2DI_type_node, NULL_TREE);
12910 tree v4si_ftype_v8hi_v8hi
12911 = build_function_type_list (V4SI_type_node,
12912 V8HI_type_node, V8HI_type_node, NULL_TREE);
12913 tree di_ftype_v8qi_v8qi
12914 = build_function_type_list (long_long_unsigned_type_node,
12915 V8QI_type_node, V8QI_type_node, NULL_TREE);
12916 tree v2di_ftype_v16qi_v16qi
12917 = build_function_type_list (V2DI_type_node,
12918 V16QI_type_node, V16QI_type_node, NULL_TREE);
12919 tree int_ftype_v16qi
12920 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12921 tree v16qi_ftype_pcchar
12922 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12923 tree void_ftype_pchar_v16qi
12924 = build_function_type_list (void_type_node,
12925 pchar_type_node, V16QI_type_node, NULL_TREE);
12926 tree v4si_ftype_pcint
12927 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12928 tree void_ftype_pcint_v4si
12929 = build_function_type_list (void_type_node,
12930 pcint_type_node, V4SI_type_node, NULL_TREE);
12931 tree v2di_ftype_v2di
12932 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12934 /* Add all builtins that are more or less simple operations on two
12936 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12938 /* Use one of the operands; the target can have a different mode for
12939 mask-generating compares. */
12940 enum machine_mode mode;
12945 mode = insn_data[d->icode].operand[1].mode;
12950 type = v16qi_ftype_v16qi_v16qi;
12953 type = v8hi_ftype_v8hi_v8hi;
12956 type = v4si_ftype_v4si_v4si;
12959 type = v2di_ftype_v2di_v2di;
12962 type = v2df_ftype_v2df_v2df;
12965 type = ti_ftype_ti_ti;
12968 type = v4sf_ftype_v4sf_v4sf;
12971 type = v8qi_ftype_v8qi_v8qi;
12974 type = v4hi_ftype_v4hi_v4hi;
12977 type = v2si_ftype_v2si_v2si;
12980 type = di_ftype_di_di;
12987 /* Override for comparisons. */
12988 if (d->icode == CODE_FOR_maskcmpv4sf3
12989 || d->icode == CODE_FOR_maskncmpv4sf3
12990 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12991 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12992 type = v4si_ftype_v4sf_v4sf;
12994 if (d->icode == CODE_FOR_maskcmpv2df3
12995 || d->icode == CODE_FOR_maskncmpv2df3
12996 || d->icode == CODE_FOR_vmmaskcmpv2df3
12997 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12998 type = v2di_ftype_v2df_v2df;
13000 def_builtin (d->mask, d->name, type, d->code);
13003 /* Add the remaining MMX insns with somewhat more complicated types. */
13004 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13005 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13006 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13007 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13008 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13010 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13011 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13012 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13014 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13015 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13017 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13018 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13020 /* comi/ucomi insns. */
13021 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13022 if (d->mask == MASK_SSE2)
13023 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13025 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13027 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13028 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13029 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13031 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13032 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13033 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13034 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13035 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13036 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13037 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13038 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13040 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13041 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13043 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13045 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13046 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13047 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13048 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13049 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13050 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13052 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13053 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13054 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13055 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13057 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13058 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13059 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13060 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13062 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13064 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13066 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13067 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13068 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13069 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13070 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13071 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13073 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13075 /* Original 3DNow! */
13076 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13077 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13078 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13079 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13080 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13081 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13082 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13083 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13084 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13085 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13086 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13087 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13088 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13089 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13090 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13091 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13092 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13093 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13094 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13095 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13097 /* 3DNow! extension as used in the Athlon CPU. */
13098 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13099 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13100 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13101 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13102 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13103 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13105 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13111 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13113 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13115 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13116 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13118 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13119 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13120 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13123 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13124 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13125 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13127 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13128 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13129 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13130 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13131 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13133 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13136 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13139 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13141 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13143 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13144 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13146 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13147 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13148 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13149 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13150 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13152 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13154 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13155 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13157 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13158 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13159 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13161 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13162 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13163 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13165 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13166 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13167 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13168 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13169 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13170 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13171 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13173 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13174 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13175 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13177 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13178 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13179 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13180 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13181 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13182 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13183 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13185 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13187 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13188 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13189 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13191 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13192 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13193 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13195 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13196 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13198 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13199 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13200 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13201 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13203 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13204 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13205 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13206 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13208 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13209 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13211 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13214 /* Errors in the source file can cause expand_expr to return const0_rtx
13215 where we expect a vector. To avoid crashing, use one of the vector
13216 clear instructions. */
13218 safe_vector_operand (x, mode)
13220 enum machine_mode mode;
13222 if (x != const0_rtx)
13224 x = gen_reg_rtx (mode);
13226 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13227 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13228 : gen_rtx_SUBREG (DImode, x, 0)));
13230 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13231 : gen_rtx_SUBREG (V4SFmode, x, 0),
13232 CONST0_RTX (V4SFmode)));
13236 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13239 ix86_expand_binop_builtin (icode, arglist, target)
13240 enum insn_code icode;
13245 tree arg0 = TREE_VALUE (arglist);
13246 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13247 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13248 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13249 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13250 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13251 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13253 if (VECTOR_MODE_P (mode0))
13254 op0 = safe_vector_operand (op0, mode0);
13255 if (VECTOR_MODE_P (mode1))
13256 op1 = safe_vector_operand (op1, mode1);
13259 || GET_MODE (target) != tmode
13260 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13261 target = gen_reg_rtx (tmode);
13263 /* In case the insn wants input operands in modes different from
13264 the result, abort. */
13265 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13268 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13269 op0 = copy_to_mode_reg (mode0, op0);
13270 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13271 op1 = copy_to_mode_reg (mode1, op1);
13273 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13274 yet one of the two must not be a memory. This is normally enforced
13275 by expanders, but we didn't bother to create one here. */
13276 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13277 op0 = copy_to_mode_reg (mode0, op0);
13279 pat = GEN_FCN (icode) (target, op0, op1);
13286 /* Subroutine of ix86_expand_builtin to take care of stores. */
13289 ix86_expand_store_builtin (icode, arglist)
13290 enum insn_code icode;
13294 tree arg0 = TREE_VALUE (arglist);
13295 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13296 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13297 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13298 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13299 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13301 if (VECTOR_MODE_P (mode1))
13302 op1 = safe_vector_operand (op1, mode1);
13304 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13306 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13307 op1 = copy_to_mode_reg (mode1, op1);
13309 pat = GEN_FCN (icode) (op0, op1);
13315 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13318 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13319 enum insn_code icode;
13325 tree arg0 = TREE_VALUE (arglist);
13326 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13327 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13328 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13331 || GET_MODE (target) != tmode
13332 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13333 target = gen_reg_rtx (tmode);
13335 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13338 if (VECTOR_MODE_P (mode0))
13339 op0 = safe_vector_operand (op0, mode0);
13341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13342 op0 = copy_to_mode_reg (mode0, op0);
13345 pat = GEN_FCN (icode) (target, op0);
13352 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13353 sqrtss, rsqrtss, rcpss. */
13356 ix86_expand_unop1_builtin (icode, arglist, target)
13357 enum insn_code icode;
13362 tree arg0 = TREE_VALUE (arglist);
13363 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13364 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13365 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13368 || GET_MODE (target) != tmode
13369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13370 target = gen_reg_rtx (tmode);
13372 if (VECTOR_MODE_P (mode0))
13373 op0 = safe_vector_operand (op0, mode0);
13375 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13376 op0 = copy_to_mode_reg (mode0, op0);
13379 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13380 op1 = copy_to_mode_reg (mode0, op1);
13382 pat = GEN_FCN (icode) (target, op0, op1);
13389 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13392 ix86_expand_sse_compare (d, arglist, target)
13393 const struct builtin_description *d;
13398 tree arg0 = TREE_VALUE (arglist);
13399 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13400 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13401 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13403 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13404 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13405 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13406 enum rtx_code comparison = d->comparison;
13408 if (VECTOR_MODE_P (mode0))
13409 op0 = safe_vector_operand (op0, mode0);
13410 if (VECTOR_MODE_P (mode1))
13411 op1 = safe_vector_operand (op1, mode1);
13413 /* Swap operands if we have a comparison that isn't available in
13417 rtx tmp = gen_reg_rtx (mode1);
13418 emit_move_insn (tmp, op1);
13424 || GET_MODE (target) != tmode
13425 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13426 target = gen_reg_rtx (tmode);
13428 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13429 op0 = copy_to_mode_reg (mode0, op0);
13430 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13431 op1 = copy_to_mode_reg (mode1, op1);
13433 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13434 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13441 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13444 ix86_expand_sse_comi (d, arglist, target)
13445 const struct builtin_description *d;
13450 tree arg0 = TREE_VALUE (arglist);
13451 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13452 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13453 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13455 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13456 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13457 enum rtx_code comparison = d->comparison;
13459 if (VECTOR_MODE_P (mode0))
13460 op0 = safe_vector_operand (op0, mode0);
13461 if (VECTOR_MODE_P (mode1))
13462 op1 = safe_vector_operand (op1, mode1);
13464 /* Swap operands if we have a comparison that isn't available in
13473 target = gen_reg_rtx (SImode);
13474 emit_move_insn (target, const0_rtx);
13475 target = gen_rtx_SUBREG (QImode, target, 0);
13477 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13478 op0 = copy_to_mode_reg (mode0, op0);
13479 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13480 op1 = copy_to_mode_reg (mode1, op1);
13482 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13483 pat = GEN_FCN (d->icode) (op0, op1);
13487 emit_insn (gen_rtx_SET (VOIDmode,
13488 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13489 gen_rtx_fmt_ee (comparison, QImode,
13493 return SUBREG_REG (target);
13496 /* Expand an expression EXP that calls a built-in function,
13497 with result going to TARGET if that's convenient
13498 (and in mode MODE if that's convenient).
13499 SUBTARGET may be used as the target for computing one of EXP's operands.
13500 IGNORE is nonzero if the value is to be ignored. */
13503 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13506 rtx subtarget ATTRIBUTE_UNUSED;
13507 enum machine_mode mode ATTRIBUTE_UNUSED;
13508 int ignore ATTRIBUTE_UNUSED;
13510 const struct builtin_description *d;
13512 enum insn_code icode;
13513 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13514 tree arglist = TREE_OPERAND (exp, 1);
13515 tree arg0, arg1, arg2;
13516 rtx op0, op1, op2, pat;
13517 enum machine_mode tmode, mode0, mode1, mode2;
13518 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13522 case IX86_BUILTIN_EMMS:
13523 emit_insn (gen_emms ());
13526 case IX86_BUILTIN_SFENCE:
13527 emit_insn (gen_sfence ());
13530 case IX86_BUILTIN_PEXTRW:
13531 case IX86_BUILTIN_PEXTRW128:
13532 icode = (fcode == IX86_BUILTIN_PEXTRW
13533 ? CODE_FOR_mmx_pextrw
13534 : CODE_FOR_sse2_pextrw);
13535 arg0 = TREE_VALUE (arglist);
13536 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13537 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13538 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13539 tmode = insn_data[icode].operand[0].mode;
13540 mode0 = insn_data[icode].operand[1].mode;
13541 mode1 = insn_data[icode].operand[2].mode;
13543 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13544 op0 = copy_to_mode_reg (mode0, op0);
13545 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13547 /* @@@ better error message */
13548 error ("selector must be an immediate");
13549 return gen_reg_rtx (tmode);
13552 || GET_MODE (target) != tmode
13553 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13554 target = gen_reg_rtx (tmode);
13555 pat = GEN_FCN (icode) (target, op0, op1);
13561 case IX86_BUILTIN_PINSRW:
13562 case IX86_BUILTIN_PINSRW128:
13563 icode = (fcode == IX86_BUILTIN_PINSRW
13564 ? CODE_FOR_mmx_pinsrw
13565 : CODE_FOR_sse2_pinsrw);
13566 arg0 = TREE_VALUE (arglist);
13567 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13568 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13569 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13570 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13571 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13572 tmode = insn_data[icode].operand[0].mode;
13573 mode0 = insn_data[icode].operand[1].mode;
13574 mode1 = insn_data[icode].operand[2].mode;
13575 mode2 = insn_data[icode].operand[3].mode;
13577 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13578 op0 = copy_to_mode_reg (mode0, op0);
13579 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13580 op1 = copy_to_mode_reg (mode1, op1);
13581 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13583 /* @@@ better error message */
13584 error ("selector must be an immediate");
13588 || GET_MODE (target) != tmode
13589 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13590 target = gen_reg_rtx (tmode);
13591 pat = GEN_FCN (icode) (target, op0, op1, op2);
13597 case IX86_BUILTIN_MASKMOVQ:
13598 case IX86_BUILTIN_MASKMOVDQU:
13599 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13600 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13601 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13602 : CODE_FOR_sse2_maskmovdqu));
13603 /* Note the arg order is different from the operand order. */
13604 arg1 = TREE_VALUE (arglist);
13605 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13606 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13607 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13608 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13609 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13610 mode0 = insn_data[icode].operand[0].mode;
13611 mode1 = insn_data[icode].operand[1].mode;
13612 mode2 = insn_data[icode].operand[2].mode;
13614 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13615 op0 = copy_to_mode_reg (mode0, op0);
13616 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13617 op1 = copy_to_mode_reg (mode1, op1);
13618 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13619 op2 = copy_to_mode_reg (mode2, op2);
13620 pat = GEN_FCN (icode) (op0, op1, op2);
13626 case IX86_BUILTIN_SQRTSS:
13627 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13628 case IX86_BUILTIN_RSQRTSS:
13629 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13630 case IX86_BUILTIN_RCPSS:
13631 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13633 case IX86_BUILTIN_LOADAPS:
13634 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13636 case IX86_BUILTIN_LOADUPS:
13637 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13639 case IX86_BUILTIN_STOREAPS:
13640 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13642 case IX86_BUILTIN_STOREUPS:
13643 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13645 case IX86_BUILTIN_LOADSS:
13646 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13648 case IX86_BUILTIN_STORESS:
13649 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13651 case IX86_BUILTIN_LOADHPS:
13652 case IX86_BUILTIN_LOADLPS:
13653 case IX86_BUILTIN_LOADHPD:
13654 case IX86_BUILTIN_LOADLPD:
13655 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13656 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13657 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13658 : CODE_FOR_sse2_movlpd);
13659 arg0 = TREE_VALUE (arglist);
13660 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13661 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13662 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13663 tmode = insn_data[icode].operand[0].mode;
13664 mode0 = insn_data[icode].operand[1].mode;
13665 mode1 = insn_data[icode].operand[2].mode;
13667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13671 || GET_MODE (target) != tmode
13672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13673 target = gen_reg_rtx (tmode);
13674 pat = GEN_FCN (icode) (target, op0, op1);
13680 case IX86_BUILTIN_STOREHPS:
13681 case IX86_BUILTIN_STORELPS:
13682 case IX86_BUILTIN_STOREHPD:
13683 case IX86_BUILTIN_STORELPD:
13684 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13685 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13686 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13687 : CODE_FOR_sse2_movlpd);
13688 arg0 = TREE_VALUE (arglist);
13689 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13690 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13691 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13692 mode0 = insn_data[icode].operand[1].mode;
13693 mode1 = insn_data[icode].operand[2].mode;
13695 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13696 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13697 op1 = copy_to_mode_reg (mode1, op1);
13699 pat = GEN_FCN (icode) (op0, op0, op1);
13705 case IX86_BUILTIN_MOVNTPS:
13706 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13707 case IX86_BUILTIN_MOVNTQ:
13708 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13710 case IX86_BUILTIN_LDMXCSR:
13711 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13712 target = assign_386_stack_local (SImode, 0);
13713 emit_move_insn (target, op0);
13714 emit_insn (gen_ldmxcsr (target));
13717 case IX86_BUILTIN_STMXCSR:
13718 target = assign_386_stack_local (SImode, 0);
13719 emit_insn (gen_stmxcsr (target));
13720 return copy_to_mode_reg (SImode, target);
13722 case IX86_BUILTIN_SHUFPS:
13723 case IX86_BUILTIN_SHUFPD:
13724 icode = (fcode == IX86_BUILTIN_SHUFPS
13725 ? CODE_FOR_sse_shufps
13726 : CODE_FOR_sse2_shufpd);
13727 arg0 = TREE_VALUE (arglist);
13728 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13729 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13730 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13731 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13732 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13733 tmode = insn_data[icode].operand[0].mode;
13734 mode0 = insn_data[icode].operand[1].mode;
13735 mode1 = insn_data[icode].operand[2].mode;
13736 mode2 = insn_data[icode].operand[3].mode;
13738 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13739 op0 = copy_to_mode_reg (mode0, op0);
13740 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13741 op1 = copy_to_mode_reg (mode1, op1);
13742 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13744 /* @@@ better error message */
13745 error ("mask must be an immediate");
13746 return gen_reg_rtx (tmode);
13749 || GET_MODE (target) != tmode
13750 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13751 target = gen_reg_rtx (tmode);
13752 pat = GEN_FCN (icode) (target, op0, op1, op2);
13758 case IX86_BUILTIN_PSHUFW:
13759 case IX86_BUILTIN_PSHUFD:
13760 case IX86_BUILTIN_PSHUFHW:
13761 case IX86_BUILTIN_PSHUFLW:
13762 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13763 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13764 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13765 : CODE_FOR_mmx_pshufw);
13766 arg0 = TREE_VALUE (arglist);
13767 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13768 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13769 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13770 tmode = insn_data[icode].operand[0].mode;
13771 mode1 = insn_data[icode].operand[1].mode;
13772 mode2 = insn_data[icode].operand[2].mode;
13774 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13775 op0 = copy_to_mode_reg (mode1, op0);
13776 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13778 /* @@@ better error message */
13779 error ("mask must be an immediate");
13783 || GET_MODE (target) != tmode
13784 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13785 target = gen_reg_rtx (tmode);
13786 pat = GEN_FCN (icode) (target, op0, op1);
13792 case IX86_BUILTIN_PSLLDQI128:
13793 case IX86_BUILTIN_PSRLDQI128:
13794 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13795 : CODE_FOR_sse2_lshrti3);
13796 arg0 = TREE_VALUE (arglist);
13797 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13798 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13799 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13800 tmode = insn_data[icode].operand[0].mode;
13801 mode1 = insn_data[icode].operand[1].mode;
13802 mode2 = insn_data[icode].operand[2].mode;
13804 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13806 op0 = copy_to_reg (op0);
13807 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13809 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13811 error ("shift must be an immediate");
13814 target = gen_reg_rtx (V2DImode);
13815 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13821 case IX86_BUILTIN_FEMMS:
13822 emit_insn (gen_femms ());
13825 case IX86_BUILTIN_PAVGUSB:
13826 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13828 case IX86_BUILTIN_PF2ID:
13829 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13831 case IX86_BUILTIN_PFACC:
13832 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13834 case IX86_BUILTIN_PFADD:
13835 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13837 case IX86_BUILTIN_PFCMPEQ:
13838 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13840 case IX86_BUILTIN_PFCMPGE:
13841 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13843 case IX86_BUILTIN_PFCMPGT:
13844 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13846 case IX86_BUILTIN_PFMAX:
13847 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13849 case IX86_BUILTIN_PFMIN:
13850 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13852 case IX86_BUILTIN_PFMUL:
13853 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13855 case IX86_BUILTIN_PFRCP:
13856 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13858 case IX86_BUILTIN_PFRCPIT1:
13859 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13861 case IX86_BUILTIN_PFRCPIT2:
13862 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13864 case IX86_BUILTIN_PFRSQIT1:
13865 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13867 case IX86_BUILTIN_PFRSQRT:
13868 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13870 case IX86_BUILTIN_PFSUB:
13871 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13873 case IX86_BUILTIN_PFSUBR:
13874 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13876 case IX86_BUILTIN_PI2FD:
13877 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13879 case IX86_BUILTIN_PMULHRW:
13880 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13882 case IX86_BUILTIN_PF2IW:
13883 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13885 case IX86_BUILTIN_PFNACC:
13886 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13888 case IX86_BUILTIN_PFPNACC:
13889 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13891 case IX86_BUILTIN_PI2FW:
13892 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13894 case IX86_BUILTIN_PSWAPDSI:
13895 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13897 case IX86_BUILTIN_PSWAPDSF:
13898 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13900 case IX86_BUILTIN_SSE_ZERO:
13901 target = gen_reg_rtx (V4SFmode);
13902 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13905 case IX86_BUILTIN_MMX_ZERO:
13906 target = gen_reg_rtx (DImode);
13907 emit_insn (gen_mmx_clrdi (target));
13910 case IX86_BUILTIN_CLRTI:
13911 target = gen_reg_rtx (V2DImode);
13912 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13916 case IX86_BUILTIN_SQRTSD:
13917 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13918 case IX86_BUILTIN_LOADAPD:
13919 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13920 case IX86_BUILTIN_LOADUPD:
13921 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13923 case IX86_BUILTIN_STOREAPD:
13924 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13925 case IX86_BUILTIN_STOREUPD:
13926 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13928 case IX86_BUILTIN_LOADSD:
13929 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13931 case IX86_BUILTIN_STORESD:
13932 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13934 case IX86_BUILTIN_SETPD1:
13935 target = assign_386_stack_local (DFmode, 0);
13936 arg0 = TREE_VALUE (arglist);
13937 emit_move_insn (adjust_address (target, DFmode, 0),
13938 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13939 op0 = gen_reg_rtx (V2DFmode);
13940 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13941 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13944 case IX86_BUILTIN_SETPD:
13945 target = assign_386_stack_local (V2DFmode, 0);
13946 arg0 = TREE_VALUE (arglist);
13947 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13948 emit_move_insn (adjust_address (target, DFmode, 0),
13949 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13950 emit_move_insn (adjust_address (target, DFmode, 8),
13951 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13952 op0 = gen_reg_rtx (V2DFmode);
13953 emit_insn (gen_sse2_movapd (op0, target));
13956 case IX86_BUILTIN_LOADRPD:
13957 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13958 gen_reg_rtx (V2DFmode), 1);
13959 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13962 case IX86_BUILTIN_LOADPD1:
13963 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13964 gen_reg_rtx (V2DFmode), 1);
13965 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13968 case IX86_BUILTIN_STOREPD1:
13969 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13970 case IX86_BUILTIN_STORERPD:
13971 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13973 case IX86_BUILTIN_CLRPD:
13974 target = gen_reg_rtx (V2DFmode);
13975 emit_insn (gen_sse_clrv2df (target));
13978 case IX86_BUILTIN_MFENCE:
13979 emit_insn (gen_sse2_mfence ());
13981 case IX86_BUILTIN_LFENCE:
13982 emit_insn (gen_sse2_lfence ());
13985 case IX86_BUILTIN_CLFLUSH:
13986 arg0 = TREE_VALUE (arglist);
13987 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13988 icode = CODE_FOR_sse2_clflush;
13989 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13990 op0 = copy_to_mode_reg (Pmode, op0);
13992 emit_insn (gen_sse2_clflush (op0));
13995 case IX86_BUILTIN_MOVNTPD:
13996 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13997 case IX86_BUILTIN_MOVNTDQ:
13998 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13999 case IX86_BUILTIN_MOVNTI:
14000 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14002 case IX86_BUILTIN_LOADDQA:
14003 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14004 case IX86_BUILTIN_LOADDQU:
14005 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14006 case IX86_BUILTIN_LOADD:
14007 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14009 case IX86_BUILTIN_STOREDQA:
14010 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14011 case IX86_BUILTIN_STOREDQU:
14012 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14013 case IX86_BUILTIN_STORED:
14014 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14020 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14021 if (d->code == fcode)
14023 /* Compares are treated specially. */
14024 if (d->icode == CODE_FOR_maskcmpv4sf3
14025 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14026 || d->icode == CODE_FOR_maskncmpv4sf3
14027 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14028 || d->icode == CODE_FOR_maskcmpv2df3
14029 || d->icode == CODE_FOR_vmmaskcmpv2df3
14030 || d->icode == CODE_FOR_maskncmpv2df3
14031 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14032 return ix86_expand_sse_compare (d, arglist, target);
14034 return ix86_expand_binop_builtin (d->icode, arglist, target);
14037 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14038 if (d->code == fcode)
14039 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14041 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14042 if (d->code == fcode)
14043 return ix86_expand_sse_comi (d, arglist, target);
14045 /* @@@ Should really do something sensible here. */
14049 /* Store OPERAND to the memory after reload is completed. This means
14050 that we can't easily use assign_stack_local. */
14052 ix86_force_to_memory (mode, operand)
14053 enum machine_mode mode;
14057 if (!reload_completed)
14059 if (TARGET_64BIT && TARGET_RED_ZONE)
14061 result = gen_rtx_MEM (mode,
14062 gen_rtx_PLUS (Pmode,
14064 GEN_INT (-RED_ZONE_SIZE)));
14065 emit_move_insn (result, operand);
14067 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14073 operand = gen_lowpart (DImode, operand);
14077 gen_rtx_SET (VOIDmode,
14078 gen_rtx_MEM (DImode,
14079 gen_rtx_PRE_DEC (DImode,
14080 stack_pointer_rtx)),
14086 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14095 split_di (&operand, 1, operands, operands + 1);
14097 gen_rtx_SET (VOIDmode,
14098 gen_rtx_MEM (SImode,
14099 gen_rtx_PRE_DEC (Pmode,
14100 stack_pointer_rtx)),
14103 gen_rtx_SET (VOIDmode,
14104 gen_rtx_MEM (SImode,
14105 gen_rtx_PRE_DEC (Pmode,
14106 stack_pointer_rtx)),
14111 /* It is better to store HImodes as SImodes. */
14112 if (!TARGET_PARTIAL_REG_STALL)
14113 operand = gen_lowpart (SImode, operand);
14117 gen_rtx_SET (VOIDmode,
14118 gen_rtx_MEM (GET_MODE (operand),
14119 gen_rtx_PRE_DEC (SImode,
14120 stack_pointer_rtx)),
14126 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14131 /* Free operand from the memory. */
14133 ix86_free_from_memory (mode)
14134 enum machine_mode mode;
14136 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14140 if (mode == DImode || TARGET_64BIT)
14142 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14146 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14147 to pop or add instruction if registers are available. */
14148 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14149 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14154 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14155 QImode must go into class Q_REGS.
14156 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14157 movdf to do mem-to-mem moves through integer regs. */
14159 ix86_preferred_reload_class (x, class)
14161 enum reg_class class;
14163 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14165 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14167 /* SSE can't load any constant directly yet. */
14168 if (SSE_CLASS_P (class))
14170 /* Floats can load 0 and 1. */
14171 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14173 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14174 if (MAYBE_SSE_CLASS_P (class))
14175 return (reg_class_subset_p (class, GENERAL_REGS)
14176 ? GENERAL_REGS : FLOAT_REGS);
14180 /* General regs can load everything. */
14181 if (reg_class_subset_p (class, GENERAL_REGS))
14182 return GENERAL_REGS;
14183 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14184 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14187 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14189 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14194 /* If we are copying between general and FP registers, we need a memory
14195 location. The same is true for SSE and MMX registers.
14197 The macro can't work reliably when one of the CLASSES is class containing
14198 registers from multiple units (SSE, MMX, integer). We avoid this by never
14199 combining those units in single alternative in the machine description.
14200 Ensure that this constraint holds to avoid unexpected surprises.
14202 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14203 enforce these sanity checks. */
14205 ix86_secondary_memory_needed (class1, class2, mode, strict)
14206 enum reg_class class1, class2;
14207 enum machine_mode mode;
14210 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14211 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14212 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14213 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14214 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14215 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14222 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14223 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14224 && (mode) != SImode)
14225 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14226 && (mode) != SImode));
14228 /* Return the cost of moving data from a register in class CLASS1 to
14229 one in class CLASS2.
14231 It is not required that the cost always equal 2 when FROM is the same as TO;
14232 on some machines it is expensive to move between registers if they are not
14233 general registers. */
14235 ix86_register_move_cost (mode, class1, class2)
14236 enum machine_mode mode;
14237 enum reg_class class1, class2;
14239 /* In case we require secondary memory, compute cost of the store followed
14240 by load. In order to avoid bad register allocation choices, we need
14241 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14243 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14247 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14248 MEMORY_MOVE_COST (mode, class1, 1));
14249 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14250 MEMORY_MOVE_COST (mode, class2, 1));
14252 /* In case of copying from general_purpose_register we may emit multiple
14253 stores followed by single load causing memory size mismatch stall.
14254 Count this as arbitrarily high cost of 20. */
14255 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14258 /* In the case of FP/MMX moves, the registers actually overlap, and we
14259 have to switch modes in order to treat them differently. */
14260 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14261 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14267 /* Moves between SSE/MMX and integer unit are expensive. */
14268 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14269 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14270 return ix86_cost->mmxsse_to_integer;
14271 if (MAYBE_FLOAT_CLASS_P (class1))
14272 return ix86_cost->fp_move;
14273 if (MAYBE_SSE_CLASS_P (class1))
14274 return ix86_cost->sse_move;
14275 if (MAYBE_MMX_CLASS_P (class1))
14276 return ix86_cost->mmx_move;
14280 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14282 ix86_hard_regno_mode_ok (regno, mode)
14284 enum machine_mode mode;
14286 /* Flags and only flags can only hold CCmode values. */
14287 if (CC_REGNO_P (regno))
14288 return GET_MODE_CLASS (mode) == MODE_CC;
14289 if (GET_MODE_CLASS (mode) == MODE_CC
14290 || GET_MODE_CLASS (mode) == MODE_RANDOM
14291 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14293 if (FP_REGNO_P (regno))
14294 return VALID_FP_MODE_P (mode);
14295 if (SSE_REGNO_P (regno))
14296 return VALID_SSE_REG_MODE (mode);
14297 if (MMX_REGNO_P (regno))
14298 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14299 /* We handle both integer and floats in the general purpose registers.
14300 In future we should be able to handle vector modes as well. */
14301 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14303 /* Take care for QImode values - they can be in non-QI regs, but then
14304 they do cause partial register stalls. */
14305 if (regno < 4 || mode != QImode || TARGET_64BIT)
14307 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14310 /* Return the cost of moving data of mode M between a
14311 register and memory. A value of 2 is the default; this cost is
14312 relative to those in `REGISTER_MOVE_COST'.
14314 If moving between registers and memory is more expensive than
14315 between two registers, you should define this macro to express the
14318 Model also increased moving costs of QImode registers in non
14322 ix86_memory_move_cost (mode, class, in)
14323 enum machine_mode mode;
14324 enum reg_class class;
14327 if (FLOAT_CLASS_P (class))
14345 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14347 if (SSE_CLASS_P (class))
14350 switch (GET_MODE_SIZE (mode))
14364 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14366 if (MMX_CLASS_P (class))
14369 switch (GET_MODE_SIZE (mode))
14380 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14382 switch (GET_MODE_SIZE (mode))
14386 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14387 : ix86_cost->movzbl_load);
14389 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14390 : ix86_cost->int_store[0] + 4);
14393 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14395 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14396 if (mode == TFmode)
14398 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14399 * ((int) GET_MODE_SIZE (mode)
14400 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14404 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14406 ix86_svr3_asm_out_constructor (symbol, priority)
14408 int priority ATTRIBUTE_UNUSED;
14411 fputs ("\tpushl $", asm_out_file);
14412 assemble_name (asm_out_file, XSTR (symbol, 0));
14413 fputc ('\n', asm_out_file);
14419 static int current_machopic_label_num;
14421 /* Given a symbol name and its associated stub, write out the
14422 definition of the stub. */
14425 machopic_output_stub (file, symb, stub)
14427 const char *symb, *stub;
14429 unsigned int length;
14430 char *binder_name, *symbol_name, lazy_ptr_name[32];
14431 int label = ++current_machopic_label_num;
14433 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14434 symb = (*targetm.strip_name_encoding) (symb);
14436 length = strlen (stub);
14437 binder_name = alloca (length + 32);
14438 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14440 length = strlen (symb);
14441 symbol_name = alloca (length + 32);
14442 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14444 sprintf (lazy_ptr_name, "L%d$lz", label);
14447 machopic_picsymbol_stub_section ();
14449 machopic_symbol_stub_section ();
14451 fprintf (file, "%s:\n", stub);
14452 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14456 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14457 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14458 fprintf (file, "\tjmp %%edx\n");
14461 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14463 fprintf (file, "%s:\n", binder_name);
14467 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14468 fprintf (file, "\tpushl %%eax\n");
14471 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14473 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14475 machopic_lazy_symbol_ptr_section ();
14476 fprintf (file, "%s:\n", lazy_ptr_name);
14477 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14478 fprintf (file, "\t.long %s\n", binder_name);
14480 #endif /* TARGET_MACHO */
14482 /* Order the registers for register allocator. */
14485 x86_order_regs_for_local_alloc ()
14490 /* First allocate the local general purpose registers. */
14491 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14492 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14493 reg_alloc_order [pos++] = i;
14495 /* Global general purpose registers. */
14496 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14497 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14498 reg_alloc_order [pos++] = i;
14500 /* x87 registers come first in case we are doing FP math
14502 if (!TARGET_SSE_MATH)
14503 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14504 reg_alloc_order [pos++] = i;
14506 /* SSE registers. */
14507 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14508 reg_alloc_order [pos++] = i;
14509 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14510 reg_alloc_order [pos++] = i;
14512 /* x87 registers. */
14513 if (TARGET_SSE_MATH)
14514 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14515 reg_alloc_order [pos++] = i;
14517 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14518 reg_alloc_order [pos++] = i;
14520 /* Initialize the rest of array as we do not allocate some registers
14522 while (pos < FIRST_PSEUDO_REGISTER)
14523 reg_alloc_order [pos++] = 0;
14526 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14527 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14531 ix86_ms_bitfield_layout_p (record_type)
14532 tree record_type ATTRIBUTE_UNUSED;
14534 return TARGET_USE_MS_BITFIELD_LAYOUT;
14537 /* Returns an expression indicating where the this parameter is
14538 located on entry to the FUNCTION. */
14541 x86_this_parameter (function)
14544 tree type = TREE_TYPE (function);
14548 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14549 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14552 if (ix86_fntype_regparm (type) > 0)
14556 parm = TYPE_ARG_TYPES (type);
14557 /* Figure out whether or not the function has a variable number of
14559 for (; parm; parm = TREE_CHAIN (parm))
14560 if (TREE_VALUE (parm) == void_type_node)
14562 /* If not, the this parameter is in %eax. */
14564 return gen_rtx_REG (SImode, 0);
14567 if (aggregate_value_p (TREE_TYPE (type)))
14568 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14570 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14573 /* Determine whether x86_output_mi_thunk can succeed. */
14576 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14577 tree thunk ATTRIBUTE_UNUSED;
14578 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14579 HOST_WIDE_INT vcall_offset;
14582 /* 64-bit can handle anything. */
14586 /* For 32-bit, everything's fine if we have one free register. */
14587 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14590 /* Need a free register for vcall_offset. */
14594 /* Need a free register for GOT references. */
14595 if (flag_pic && !(*targetm.binds_local_p) (function))
14598 /* Otherwise ok. */
14602 /* Output the assembler code for a thunk function. THUNK_DECL is the
14603 declaration for the thunk function itself, FUNCTION is the decl for
14604 the target function. DELTA is an immediate constant offset to be
14605 added to THIS. If VCALL_OFFSET is nonzero, the word at
14606 *(*this + vcall_offset) should be added to THIS. */
14609 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14610 FILE *file ATTRIBUTE_UNUSED;
14611 tree thunk ATTRIBUTE_UNUSED;
14612 HOST_WIDE_INT delta;
14613 HOST_WIDE_INT vcall_offset;
14617 rtx this = x86_this_parameter (function);
14620 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14621 pull it in now and let DELTA benefit. */
14624 else if (vcall_offset)
14626 /* Put the this parameter into %eax. */
14628 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14629 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14632 this_reg = NULL_RTX;
14634 /* Adjust the this parameter by a fixed constant. */
14637 xops[0] = GEN_INT (delta);
14638 xops[1] = this_reg ? this_reg : this;
14641 if (!x86_64_general_operand (xops[0], DImode))
14643 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14645 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14649 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14652 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14655 /* Adjust the this parameter by a value stored in the vtable. */
14659 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14661 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14663 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14666 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14668 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14670 /* Adjust the this parameter. */
14671 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14672 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14674 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14675 xops[0] = GEN_INT (vcall_offset);
14677 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14678 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14680 xops[1] = this_reg;
14682 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14684 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14687 /* If necessary, drop THIS back to its stack slot. */
14688 if (this_reg && this_reg != this)
14690 xops[0] = this_reg;
14692 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14695 xops[0] = DECL_RTL (function);
14698 if (!flag_pic || (*targetm.binds_local_p) (function))
14699 output_asm_insn ("jmp\t%P0", xops);
14702 tmp = XEXP (xops[0], 0);
14703 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14704 tmp = gen_rtx_CONST (Pmode, tmp);
14705 tmp = gen_rtx_MEM (QImode, tmp);
14707 output_asm_insn ("jmp\t%A0", xops);
14712 if (!flag_pic || (*targetm.binds_local_p) (function))
14713 output_asm_insn ("jmp\t%P0", xops);
14716 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14717 output_set_got (tmp);
14720 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14721 output_asm_insn ("jmp\t{*}%1", xops);
14727 x86_field_alignment (field, computed)
14731 enum machine_mode mode;
14732 tree type = TREE_TYPE (field);
14734 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14736 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14737 ? get_inner_array_type (type) : type);
14738 if (mode == DFmode || mode == DCmode
14739 || GET_MODE_CLASS (mode) == MODE_INT
14740 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14741 return MIN (32, computed);
14745 /* Output assembler code to FILE to increment profiler label # LABELNO
14746 for profiling a function entry. */
14748 x86_function_profiler (file, labelno)
14750 int labelno ATTRIBUTE_UNUSED;
14755 #ifndef NO_PROFILE_COUNTERS
14756 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14758 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14762 #ifndef NO_PROFILE_COUNTERS
14763 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14765 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14769 #ifndef NO_PROFILE_COUNTERS
14770 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14771 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14773 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14777 #ifndef NO_PROFILE_COUNTERS
14778 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14779 PROFILE_COUNT_REGISTER);
14781 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14785 /* Implement machine specific optimizations.
14786 At the moment we implement single transformation: AMD Athlon works faster
14787 when RET is not destination of conditional jump or directly preceded
14788 by other jump instruction. We avoid the penalty by inserting NOP just
14789 before the RET instructions in such cases. */
14791 x86_machine_dependent_reorg (first)
14792 rtx first ATTRIBUTE_UNUSED;
14796 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14798 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14800 basic_block bb = e->src;
14803 bool insert = false;
14805 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14807 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14808 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14810 if (prev && GET_CODE (prev) == CODE_LABEL)
14813 for (e = bb->pred; e; e = e->pred_next)
14814 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14815 && !(e->flags & EDGE_FALLTHRU))
14820 prev = prev_active_insn (ret);
14821 if (prev && GET_CODE (prev) == JUMP_INSN
14822 && any_condjump_p (prev))
14824 /* Empty functions get branch misspredict even when the jump destination
14825 is not visible to us. */
14826 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14830 emit_insn_before (gen_nop (), ret);
14834 /* Return nonzero when QImode register that must be represented via REX prefix
14837 x86_extended_QIreg_mentioned_p (insn)
14841 extract_insn_cached (insn);
14842 for (i = 0; i < recog_data.n_operands; i++)
14843 if (REG_P (recog_data.operand[i])
14844 && REGNO (recog_data.operand[i]) >= 4)
14849 /* Return nonzero when P points to register encoded via REX prefix.
14850 Called via for_each_rtx. */
14852 extended_reg_mentioned_1 (p, data)
14854 void *data ATTRIBUTE_UNUSED;
14856 unsigned int regno;
14859 regno = REGNO (*p);
14860 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14863 /* Return true when INSN mentions register that must be encoded using REX
14866 x86_extended_reg_mentioned_p (insn)
14869 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14872 #include "gt-i386.h"