1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Processor costs (relative to an add) */
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
98 /* Processor costs (relative to an add) */
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
451 const struct processor_costs *ix86_cost = &pentium_cost;
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts insetad of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
517 /* In case the avreage insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
520 #define FAST_PROLOGUE_INSN_COUNT 20
522 /* Set by prologue expander and used by epilogue expander to determine
524 static int use_fast_prologue_epilogue;
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
537 AREG, DREG, CREG, BREG,
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 /* The "default" register map used in 32bit mode. */
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
570 static int const x86_64_int_parameter_registers[6] =
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
576 static int const x86_64_int_return_registers[4] =
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
664 /* The encoding characters for the four TLS models present in ELF. */
666 static char const tls_model_chars[] = " GLil";
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
684 /* Structure describing stack frame layout.
685 Stack grows downward:
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
698 > to_allocate <- FRAME_POINTER
710 int outgoing_arguments_size;
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
725 enum cmodel ix86_cmodel;
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
752 /* ix86_regparm_string as a number */
755 /* Alignment to use for loops and jumps: */
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
830 rtx base, index, disp;
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
875 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
876 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
879 /* Register class used for passing given 64bit part of the argument.
880 These represent classes as documented by the PS ABI, with the exception
881 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
882 use SF or DFmode move instead of DImode to avoid reformating penalties.
884 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
885 whenever possible (upper half does contain padding).
887 enum x86_64_reg_class
890 X86_64_INTEGER_CLASS,
891 X86_64_INTEGERSI_CLASS,
900 static const char * const x86_64_reg_class_name[] =
901 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
903 #define MAX_CLASSES 4
904 static int classify_argument PARAMS ((enum machine_mode, tree,
905 enum x86_64_reg_class [MAX_CLASSES],
907 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
909 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
911 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
912 enum x86_64_reg_class));
914 /* Initialize the GCC target structure. */
915 #undef TARGET_ATTRIBUTE_TABLE
916 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
917 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
918 # undef TARGET_MERGE_DECL_ATTRIBUTES
919 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
922 #undef TARGET_COMP_TYPE_ATTRIBUTES
923 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
925 #undef TARGET_INIT_BUILTINS
926 #define TARGET_INIT_BUILTINS ix86_init_builtins
928 #undef TARGET_EXPAND_BUILTIN
929 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
931 #undef TARGET_ASM_FUNCTION_EPILOGUE
932 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
934 #undef TARGET_ASM_OPEN_PAREN
935 #define TARGET_ASM_OPEN_PAREN ""
936 #undef TARGET_ASM_CLOSE_PAREN
937 #define TARGET_ASM_CLOSE_PAREN ""
939 #undef TARGET_ASM_ALIGNED_HI_OP
940 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
941 #undef TARGET_ASM_ALIGNED_SI_OP
942 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
944 #undef TARGET_ASM_ALIGNED_DI_OP
945 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
948 #undef TARGET_ASM_UNALIGNED_HI_OP
949 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
950 #undef TARGET_ASM_UNALIGNED_SI_OP
951 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
952 #undef TARGET_ASM_UNALIGNED_DI_OP
953 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
955 #undef TARGET_SCHED_ADJUST_COST
956 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
957 #undef TARGET_SCHED_ISSUE_RATE
958 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
959 #undef TARGET_SCHED_VARIABLE_ISSUE
960 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
961 #undef TARGET_SCHED_INIT
962 #define TARGET_SCHED_INIT ix86_sched_init
963 #undef TARGET_SCHED_REORDER
964 #define TARGET_SCHED_REORDER ix86_sched_reorder
965 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
966 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
967 ia32_use_dfa_pipeline_interface
968 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
969 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
970 ia32_multipass_dfa_lookahead
972 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
973 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
976 #undef TARGET_HAVE_TLS
977 #define TARGET_HAVE_TLS true
979 #undef TARGET_CANNOT_FORCE_CONST_MEM
980 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
982 #undef TARGET_MS_BITFIELD_LAYOUT_P
983 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
985 #undef TARGET_ASM_OUTPUT_MI_THUNK
986 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
987 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
988 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
990 struct gcc_target targetm = TARGET_INITIALIZER;
992 /* Sometimes certain combinations of command options do not make
993 sense on a particular target machine. You can define a macro
994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
995 defined, is executed once just after all the command options have
998 Don't use this macro to turn on various extra optimizations for
999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1005 /* Comes from final.c -- no real reason to change it. */
1006 #define MAX_CODE_ALIGN 16
1010 const struct processor_costs *cost; /* Processor costs */
1011 const int target_enable; /* Target flags to enable. */
1012 const int target_disable; /* Target flags to disable. */
1013 const int align_loop; /* Default alignments. */
1014 const int align_loop_max_skip;
1015 const int align_jump;
1016 const int align_jump_max_skip;
1017 const int align_func;
1019 const processor_target_table[PROCESSOR_max] =
1021 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1022 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1023 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1024 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1025 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1026 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1027 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1028 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1031 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1034 const char *const name; /* processor name or nickname. */
1035 const enum processor_type processor;
1036 const enum pta_flags
1041 PTA_PREFETCH_SSE = 8,
1047 const processor_alias_table[] =
1049 {"i386", PROCESSOR_I386, 0},
1050 {"i486", PROCESSOR_I486, 0},
1051 {"i586", PROCESSOR_PENTIUM, 0},
1052 {"pentium", PROCESSOR_PENTIUM, 0},
1053 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1054 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1055 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1056 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"i686", PROCESSOR_PENTIUMPRO, 0},
1058 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1060 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1061 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1062 PTA_MMX | PTA_PREFETCH_SSE},
1063 {"k6", PROCESSOR_K6, PTA_MMX},
1064 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1065 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1068 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1069 | PTA_3DNOW | PTA_3DNOW_A},
1070 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1071 | PTA_3DNOW_A | PTA_SSE},
1072 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1073 | PTA_3DNOW_A | PTA_SSE},
1074 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1075 | PTA_3DNOW_A | PTA_SSE},
1076 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1077 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1080 int const pta_size = ARRAY_SIZE (processor_alias_table);
1082 /* By default our XFmode is the 80-bit extended format. If we have
1083 use TFmode instead, it's also the 80-bit format, but with padding. */
1084 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1085 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1087 /* Set the default values for switches whose default depends on TARGET_64BIT
1088 in case they weren't overwriten by command line options. */
1091 if (flag_omit_frame_pointer == 2)
1092 flag_omit_frame_pointer = 1;
1093 if (flag_asynchronous_unwind_tables == 2)
1094 flag_asynchronous_unwind_tables = 1;
1095 if (flag_pcc_struct_return == 2)
1096 flag_pcc_struct_return = 0;
1100 if (flag_omit_frame_pointer == 2)
1101 flag_omit_frame_pointer = 0;
1102 if (flag_asynchronous_unwind_tables == 2)
1103 flag_asynchronous_unwind_tables = 0;
1104 if (flag_pcc_struct_return == 2)
1105 flag_pcc_struct_return = 1;
1108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1109 SUBTARGET_OVERRIDE_OPTIONS;
1112 if (!ix86_cpu_string && ix86_arch_string)
1113 ix86_cpu_string = ix86_arch_string;
1114 if (!ix86_cpu_string)
1115 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1116 if (!ix86_arch_string)
1117 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1119 if (ix86_cmodel_string != 0)
1121 if (!strcmp (ix86_cmodel_string, "small"))
1122 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1124 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1125 else if (!strcmp (ix86_cmodel_string, "32"))
1126 ix86_cmodel = CM_32;
1127 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1128 ix86_cmodel = CM_KERNEL;
1129 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1130 ix86_cmodel = CM_MEDIUM;
1131 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1132 ix86_cmodel = CM_LARGE;
1134 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1138 ix86_cmodel = CM_32;
1140 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1142 if (ix86_asm_string != 0)
1144 if (!strcmp (ix86_asm_string, "intel"))
1145 ix86_asm_dialect = ASM_INTEL;
1146 else if (!strcmp (ix86_asm_string, "att"))
1147 ix86_asm_dialect = ASM_ATT;
1149 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1151 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1152 error ("code model `%s' not supported in the %s bit mode",
1153 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1154 if (ix86_cmodel == CM_LARGE)
1155 sorry ("code model `large' not supported yet");
1156 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1157 sorry ("%i-bit mode not compiled in",
1158 (target_flags & MASK_64BIT) ? 64 : 32);
1160 for (i = 0; i < pta_size; i++)
1161 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1163 ix86_arch = processor_alias_table[i].processor;
1164 /* Default cpu tuning to the architecture. */
1165 ix86_cpu = ix86_arch;
1166 if (processor_alias_table[i].flags & PTA_MMX
1167 && !(target_flags_explicit & MASK_MMX))
1168 target_flags |= MASK_MMX;
1169 if (processor_alias_table[i].flags & PTA_3DNOW
1170 && !(target_flags_explicit & MASK_3DNOW))
1171 target_flags |= MASK_3DNOW;
1172 if (processor_alias_table[i].flags & PTA_3DNOW_A
1173 && !(target_flags_explicit & MASK_3DNOW_A))
1174 target_flags |= MASK_3DNOW_A;
1175 if (processor_alias_table[i].flags & PTA_SSE
1176 && !(target_flags_explicit & MASK_SSE))
1177 target_flags |= MASK_SSE;
1178 if (processor_alias_table[i].flags & PTA_SSE2
1179 && !(target_flags_explicit & MASK_SSE2))
1180 target_flags |= MASK_SSE2;
1181 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1182 x86_prefetch_sse = true;
1183 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1184 error ("CPU you selected does not support x86-64 instruction set");
1189 error ("bad value (%s) for -march= switch", ix86_arch_string);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1194 ix86_cpu = processor_alias_table[i].processor;
1195 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1196 error ("CPU you selected does not support x86-64 instruction set");
1199 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1200 x86_prefetch_sse = true;
1202 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1205 ix86_cost = &size_cost;
1207 ix86_cost = processor_target_table[ix86_cpu].cost;
1208 target_flags |= processor_target_table[ix86_cpu].target_enable;
1209 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1211 /* Arrange to set up i386_stack_locals for all functions. */
1212 init_machine_status = ix86_init_machine_status;
1214 /* Validate -mregparm= value. */
1215 if (ix86_regparm_string)
1217 i = atoi (ix86_regparm_string);
1218 if (i < 0 || i > REGPARM_MAX)
1219 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1225 ix86_regparm = REGPARM_MAX;
1227 /* If the user has provided any of the -malign-* options,
1228 warn and use that value only if -falign-* is not set.
1229 Remove this code in GCC 3.2 or later. */
1230 if (ix86_align_loops_string)
1232 warning ("-malign-loops is obsolete, use -falign-loops");
1233 if (align_loops == 0)
1235 i = atoi (ix86_align_loops_string);
1236 if (i < 0 || i > MAX_CODE_ALIGN)
1237 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1239 align_loops = 1 << i;
1243 if (ix86_align_jumps_string)
1245 warning ("-malign-jumps is obsolete, use -falign-jumps");
1246 if (align_jumps == 0)
1248 i = atoi (ix86_align_jumps_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 align_jumps = 1 << i;
1256 if (ix86_align_funcs_string)
1258 warning ("-malign-functions is obsolete, use -falign-functions");
1259 if (align_functions == 0)
1261 i = atoi (ix86_align_funcs_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 align_functions = 1 << i;
1269 /* Default align_* from the processor table. */
1270 if (align_loops == 0)
1272 align_loops = processor_target_table[ix86_cpu].align_loop;
1273 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1275 if (align_jumps == 0)
1277 align_jumps = processor_target_table[ix86_cpu].align_jump;
1278 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1280 if (align_functions == 0)
1282 align_functions = processor_target_table[ix86_cpu].align_func;
1285 /* Validate -mpreferred-stack-boundary= value, or provide default.
1286 The default of 128 bits is for Pentium III's SSE __m128, but we
1287 don't want additional code to keep the stack aligned when
1288 optimizing for code size. */
1289 ix86_preferred_stack_boundary = (optimize_size
1290 ? TARGET_64BIT ? 128 : 32
1292 if (ix86_preferred_stack_boundary_string)
1294 i = atoi (ix86_preferred_stack_boundary_string);
1295 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1296 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1297 TARGET_64BIT ? 4 : 2);
1299 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1302 /* Validate -mbranch-cost= value, or provide default. */
1303 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1304 if (ix86_branch_cost_string)
1306 i = atoi (ix86_branch_cost_string);
1308 error ("-mbranch-cost=%d is not between 0 and 5", i);
1310 ix86_branch_cost = i;
1313 if (ix86_tls_dialect_string)
1315 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1316 ix86_tls_dialect = TLS_DIALECT_GNU;
1317 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1318 ix86_tls_dialect = TLS_DIALECT_SUN;
1320 error ("bad value (%s) for -mtls-dialect= switch",
1321 ix86_tls_dialect_string);
1324 /* Keep nonleaf frame pointers. */
1325 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1326 flag_omit_frame_pointer = 1;
1328 /* If we're doing fast math, we don't care about comparison order
1329 wrt NaNs. This lets us use a shorter comparison sequence. */
1330 if (flag_unsafe_math_optimizations)
1331 target_flags &= ~MASK_IEEE_FP;
1333 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1334 since the insns won't need emulation. */
1335 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1336 target_flags &= ~MASK_NO_FANCY_MATH_387;
1340 if (TARGET_ALIGN_DOUBLE)
1341 error ("-malign-double makes no sense in the 64bit mode");
1343 error ("-mrtd calling convention not supported in the 64bit mode");
1344 /* Enable by default the SSE and MMX builtins. */
1345 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1346 ix86_fpmath = FPMATH_SSE;
1349 ix86_fpmath = FPMATH_387;
1351 if (ix86_fpmath_string != 0)
1353 if (! strcmp (ix86_fpmath_string, "387"))
1354 ix86_fpmath = FPMATH_387;
1355 else if (! strcmp (ix86_fpmath_string, "sse"))
1359 warning ("SSE instruction set disabled, using 387 arithmetics");
1360 ix86_fpmath = FPMATH_387;
1363 ix86_fpmath = FPMATH_SSE;
1365 else if (! strcmp (ix86_fpmath_string, "387,sse")
1366 || ! strcmp (ix86_fpmath_string, "sse,387"))
1370 warning ("SSE instruction set disabled, using 387 arithmetics");
1371 ix86_fpmath = FPMATH_387;
1373 else if (!TARGET_80387)
1375 warning ("387 instruction set disabled, using SSE arithmetics");
1376 ix86_fpmath = FPMATH_SSE;
1379 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1382 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1385 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1389 target_flags |= MASK_MMX;
1390 x86_prefetch_sse = true;
1393 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1396 target_flags |= MASK_MMX;
1397 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1398 extensions it adds. */
1399 if (x86_3dnow_a & (1 << ix86_arch))
1400 target_flags |= MASK_3DNOW_A;
1402 if ((x86_accumulate_outgoing_args & CPUMASK)
1403 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1405 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1407 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1410 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1411 p = strchr (internal_label_prefix, 'X');
1412 internal_label_prefix_len = p - internal_label_prefix;
1418 optimization_options (level, size)
1420 int size ATTRIBUTE_UNUSED;
1422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1423 make the problem with not enough registers even worse. */
1424 #ifdef INSN_SCHEDULING
1426 flag_schedule_insns = 0;
1429 /* The default values of these switches depend on the TARGET_64BIT
1430 that is not known at this moment. Mark these values with 2 and
1431 let user the to override these. In case there is no command line option
1432 specifying them, we will set the defaults in override_options. */
1434 flag_omit_frame_pointer = 2;
1435 flag_pcc_struct_return = 2;
1436 flag_asynchronous_unwind_tables = 2;
1439 /* Table of valid machine attributes. */
1440 const struct attribute_spec ix86_attribute_table[] =
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1443 /* Stdcall attribute says callee is responsible for popping arguments
1444 if they are not variable. */
1445 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1446 /* Fastcall attribute says callee is responsible for popping arguments
1447 if they are not variable. */
1448 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1449 /* Cdecl attribute says the callee is a normal C declaration */
1450 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1451 /* Regparm attribute specifies how many integer arguments are to be
1452 passed in registers. */
1453 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1454 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1455 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1456 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1459 { NULL, 0, 0, false, false, false, NULL }
1462 /* If PIC, we cannot make sibling calls to global functions
1463 because the PLT requires %ebx live.
1464 If we are returning floats on the register stack, we cannot make
1465 sibling calls to functions that return floats. (The stack adjust
1466 instruction will wind up after the sibcall jump, and not be executed.) */
1469 ix86_function_ok_for_sibcall (decl, exp)
1473 /* If we are generating position-independent code, we cannot sibcall
1474 optimize any indirect call, or a direct call to a global function,
1475 as the PLT requires %ebx be live. */
1476 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1479 /* If we are returning floats on the 80387 register stack, we cannot
1480 make a sibcall from a function that doesn't return a float to a
1481 function that does; the necessary stack adjustment will not be
1483 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1484 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1487 /* If this call is indirect, we'll need to be able to use a call-clobbered
1488 register for the address of the target function. Make sure that all
1489 such registers are not used for passing parameters. */
1490 if (!decl && !TARGET_64BIT)
1492 int regparm = ix86_regparm;
1495 /* We're looking at the CALL_EXPR, we need the type of the function. */
1496 type = TREE_OPERAND (exp, 0); /* pointer expression */
1497 type = TREE_TYPE (type); /* pointer type */
1498 type = TREE_TYPE (type); /* function type */
1500 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1502 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1506 /* ??? Need to count the actual number of registers to be used,
1507 not the possible number of registers. Fix later. */
1512 /* Otherwise okay. That also includes certain types of indirect calls. */
1516 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1517 arguments as in struct attribute_spec.handler. */
1519 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1522 tree args ATTRIBUTE_UNUSED;
1523 int flags ATTRIBUTE_UNUSED;
1526 if (TREE_CODE (*node) != FUNCTION_TYPE
1527 && TREE_CODE (*node) != METHOD_TYPE
1528 && TREE_CODE (*node) != FIELD_DECL
1529 && TREE_CODE (*node) != TYPE_DECL)
1531 warning ("`%s' attribute only applies to functions",
1532 IDENTIFIER_POINTER (name));
1533 *no_add_attrs = true;
1537 if (is_attribute_p ("fastcall", name))
1539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1541 error ("fastcall and stdcall attributes are not compatible");
1543 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1545 error ("fastcall and regparm attributes are not compatible");
1548 else if (is_attribute_p ("stdcall", name))
1550 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1552 error ("fastcall and stdcall attributes are not compatible");
1559 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1560 *no_add_attrs = true;
1566 /* Handle a "regparm" attribute;
1567 arguments as in struct attribute_spec.handler. */
1569 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1573 int flags ATTRIBUTE_UNUSED;
1576 if (TREE_CODE (*node) != FUNCTION_TYPE
1577 && TREE_CODE (*node) != METHOD_TYPE
1578 && TREE_CODE (*node) != FIELD_DECL
1579 && TREE_CODE (*node) != TYPE_DECL)
1581 warning ("`%s' attribute only applies to functions",
1582 IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1589 cst = TREE_VALUE (args);
1590 if (TREE_CODE (cst) != INTEGER_CST)
1592 warning ("`%s' attribute requires an integer constant argument",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1598 warning ("argument to `%s' attribute larger than %d",
1599 IDENTIFIER_POINTER (name), REGPARM_MAX);
1600 *no_add_attrs = true;
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and regparm attributes are not compatible");
1612 /* Return 0 if the attributes for two types are incompatible, 1 if they
1613 are compatible, and 2 if they are nearly compatible (which causes a
1614 warning to be generated). */
1617 ix86_comp_type_attributes (type1, type2)
1621 /* Check for mismatch of non-default calling convention. */
1622 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1624 if (TREE_CODE (type1) != FUNCTION_TYPE)
1627 /* Check for mismatched fastcall types */
1628 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1629 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1632 /* Check for mismatched return types (cdecl vs stdcall). */
1633 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1634 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1639 /* Return the regparm value for a fuctio with the indicated TYPE. */
1642 ix86_fntype_regparm (type)
1647 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1649 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1651 return ix86_regparm;
1654 /* Value is the number of bytes of arguments automatically
1655 popped when returning from a subroutine call.
1656 FUNDECL is the declaration node of the function (as a tree),
1657 FUNTYPE is the data type of the function (as a tree),
1658 or for a library call it is an identifier node for the subroutine name.
1659 SIZE is the number of bytes of arguments passed on the stack.
1661 On the 80386, the RTD insn may be used to pop them if the number
1662 of args is fixed, but if the number is variable then the caller
1663 must pop them all. RTD can't be used for library calls now
1664 because the library is compiled with the Unix compiler.
1665 Use of RTD is a selectable option, since it is incompatible with
1666 standard Unix calling sequences. If the option is not selected,
1667 the caller must always pop the args.
1669 The attribute stdcall is equivalent to RTD on a per module basis. */
1672 ix86_return_pops_args (fundecl, funtype, size)
1677 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1679 /* Cdecl functions override -mrtd, and never pop the stack. */
1680 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1682 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1688 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1689 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1690 == void_type_node)))
1694 /* Lose any fake structure return argument if it is passed on the stack. */
1695 if (aggregate_value_p (TREE_TYPE (funtype))
1698 int nregs = ix86_fntype_regparm (funtype);
1701 return GET_MODE_SIZE (Pmode);
1707 /* Argument support functions. */
1709 /* Return true when register may be used to pass function parameters. */
1711 ix86_function_arg_regno_p (regno)
1716 return (regno < REGPARM_MAX
1717 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1718 if (SSE_REGNO_P (regno) && TARGET_SSE)
1720 /* RAX is used as hidden argument to va_arg functions. */
1723 for (i = 0; i < REGPARM_MAX; i++)
1724 if (regno == x86_64_int_parameter_registers[i])
1729 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1730 for a call to a function whose data type is FNTYPE.
1731 For a library call, FNTYPE is 0. */
1734 init_cumulative_args (cum, fntype, libname)
1735 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1736 tree fntype; /* tree ptr for function decl */
1737 rtx libname; /* SYMBOL_REF of library name or 0 */
1739 static CUMULATIVE_ARGS zero_cum;
1740 tree param, next_param;
1742 if (TARGET_DEBUG_ARG)
1744 fprintf (stderr, "\ninit_cumulative_args (");
1746 fprintf (stderr, "fntype code = %s, ret code = %s",
1747 tree_code_name[(int) TREE_CODE (fntype)],
1748 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1750 fprintf (stderr, "no fntype");
1753 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1758 /* Set up the number of registers to use for passing arguments. */
1759 cum->nregs = ix86_regparm;
1760 cum->sse_nregs = SSE_REGPARM_MAX;
1761 if (fntype && !TARGET_64BIT)
1763 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1766 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1768 cum->maybe_vaarg = false;
1770 /* Use ecx and edx registers if function has fastcall attribute */
1771 if (fntype && !TARGET_64BIT)
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1781 /* Determine if this function has variable arguments. This is
1782 indicated by the last argument being 'void_type_mode' if there
1783 are no variable arguments. If there are variable arguments, then
1784 we won't pass anything in registers */
1788 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1789 param != 0; param = next_param)
1791 next_param = TREE_CHAIN (param);
1792 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1799 cum->maybe_vaarg = true;
1803 if ((!fntype && !libname)
1804 || (fntype && !TYPE_ARG_TYPES (fntype)))
1805 cum->maybe_vaarg = 1;
1807 if (TARGET_DEBUG_ARG)
1808 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1813 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1814 of this code is to classify each 8bytes of incoming argument by the register
1815 class and assign registers accordingly. */
1817 /* Return the union class of CLASS1 and CLASS2.
1818 See the x86-64 PS ABI for details. */
1820 static enum x86_64_reg_class
1821 merge_classes (class1, class2)
1822 enum x86_64_reg_class class1, class2;
1824 /* Rule #1: If both classes are equal, this is the resulting class. */
1825 if (class1 == class2)
1828 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1830 if (class1 == X86_64_NO_CLASS)
1832 if (class2 == X86_64_NO_CLASS)
1835 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1836 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1837 return X86_64_MEMORY_CLASS;
1839 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1840 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1841 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1842 return X86_64_INTEGERSI_CLASS;
1843 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1844 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1845 return X86_64_INTEGER_CLASS;
1847 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1848 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1849 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1850 return X86_64_MEMORY_CLASS;
1852 /* Rule #6: Otherwise class SSE is used. */
1853 return X86_64_SSE_CLASS;
1856 /* Classify the argument of type TYPE and mode MODE.
1857 CLASSES will be filled by the register class used to pass each word
1858 of the operand. The number of words is returned. In case the parameter
1859 should be passed in memory, 0 is returned. As a special case for zero
1860 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1862 BIT_OFFSET is used internally for handling records and specifies offset
1863 of the offset in bits modulo 256 to avoid overflow cases.
1865 See the x86-64 PS ABI for details.
1869 classify_argument (mode, type, classes, bit_offset)
1870 enum machine_mode mode;
1872 enum x86_64_reg_class classes[MAX_CLASSES];
1876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1879 /* Variable sized entities are always passed/returned in memory. */
1883 if (type && AGGREGATE_TYPE_P (type))
1887 enum x86_64_reg_class subclasses[MAX_CLASSES];
1889 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1893 for (i = 0; i < words; i++)
1894 classes[i] = X86_64_NO_CLASS;
1896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1897 signalize memory class, so handle it as special case. */
1900 classes[0] = X86_64_NO_CLASS;
1904 /* Classify each field of record and merge classes. */
1905 if (TREE_CODE (type) == RECORD_TYPE)
1907 /* For classes first merge in the field of the subclasses. */
1908 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1910 tree bases = TYPE_BINFO_BASETYPES (type);
1911 int n_bases = TREE_VEC_LENGTH (bases);
1914 for (i = 0; i < n_bases; ++i)
1916 tree binfo = TREE_VEC_ELT (bases, i);
1918 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1919 tree type = BINFO_TYPE (binfo);
1921 num = classify_argument (TYPE_MODE (type),
1923 (offset + bit_offset) % 256);
1926 for (i = 0; i < num; i++)
1928 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1930 merge_classes (subclasses[i], classes[i + pos]);
1934 /* And now merge the fields of structure. */
1935 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1937 if (TREE_CODE (field) == FIELD_DECL)
1941 /* Bitfields are always classified as integer. Handle them
1942 early, since later code would consider them to be
1943 misaligned integers. */
1944 if (DECL_BIT_FIELD (field))
1946 for (i = int_bit_position (field) / 8 / 8;
1947 i < (int_bit_position (field)
1948 + tree_low_cst (DECL_SIZE (field), 0)
1951 merge_classes (X86_64_INTEGER_CLASS,
1956 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1957 TREE_TYPE (field), subclasses,
1958 (int_bit_position (field)
1959 + bit_offset) % 256);
1962 for (i = 0; i < num; i++)
1965 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1967 merge_classes (subclasses[i], classes[i + pos]);
1973 /* Arrays are handled as small records. */
1974 else if (TREE_CODE (type) == ARRAY_TYPE)
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1978 TREE_TYPE (type), subclasses, bit_offset);
1982 /* The partial classes are now full classes. */
1983 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1984 subclasses[0] = X86_64_SSE_CLASS;
1985 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1986 subclasses[0] = X86_64_INTEGER_CLASS;
1988 for (i = 0; i < words; i++)
1989 classes[i] = subclasses[i % num];
1991 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1992 else if (TREE_CODE (type) == UNION_TYPE
1993 || TREE_CODE (type) == QUAL_UNION_TYPE)
1995 /* For classes first merge in the field of the subclasses. */
1996 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1998 tree bases = TYPE_BINFO_BASETYPES (type);
1999 int n_bases = TREE_VEC_LENGTH (bases);
2002 for (i = 0; i < n_bases; ++i)
2004 tree binfo = TREE_VEC_ELT (bases, i);
2006 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2007 tree type = BINFO_TYPE (binfo);
2009 num = classify_argument (TYPE_MODE (type),
2011 (offset + (bit_offset % 64)) % 256);
2014 for (i = 0; i < num; i++)
2016 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2018 merge_classes (subclasses[i], classes[i + pos]);
2022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2024 if (TREE_CODE (field) == FIELD_DECL)
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2032 for (i = 0; i < num; i++)
2033 classes[i] = merge_classes (subclasses[i], classes[i]);
2040 /* Final merger cleanup. */
2041 for (i = 0; i < words; i++)
2043 /* If one class is MEMORY, everything should be passed in
2045 if (classes[i] == X86_64_MEMORY_CLASS)
2048 /* The X86_64_SSEUP_CLASS should be always preceded by
2049 X86_64_SSE_CLASS. */
2050 if (classes[i] == X86_64_SSEUP_CLASS
2051 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2052 classes[i] = X86_64_SSE_CLASS;
2054 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2055 if (classes[i] == X86_64_X87UP_CLASS
2056 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2057 classes[i] = X86_64_SSE_CLASS;
2062 /* Compute alignment needed. We align all types to natural boundaries with
2063 exception of XFmode that is aligned to 64bits. */
2064 if (mode != VOIDmode && mode != BLKmode)
2066 int mode_alignment = GET_MODE_BITSIZE (mode);
2069 mode_alignment = 128;
2070 else if (mode == XCmode)
2071 mode_alignment = 256;
2072 /* Misaligned fields are always returned in memory. */
2073 if (bit_offset % mode_alignment)
2077 /* Classification of atomic types. */
2087 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2088 classes[0] = X86_64_INTEGERSI_CLASS;
2090 classes[0] = X86_64_INTEGER_CLASS;
2094 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2101 if (!(bit_offset % 64))
2102 classes[0] = X86_64_SSESF_CLASS;
2104 classes[0] = X86_64_SSE_CLASS;
2107 classes[0] = X86_64_SSEDF_CLASS;
2110 classes[0] = X86_64_X87_CLASS;
2111 classes[1] = X86_64_X87UP_CLASS;
2114 classes[0] = X86_64_X87_CLASS;
2115 classes[1] = X86_64_X87UP_CLASS;
2116 classes[2] = X86_64_X87_CLASS;
2117 classes[3] = X86_64_X87UP_CLASS;
2120 classes[0] = X86_64_SSEDF_CLASS;
2121 classes[1] = X86_64_SSEDF_CLASS;
2124 classes[0] = X86_64_SSE_CLASS;
2132 classes[0] = X86_64_SSE_CLASS;
2133 classes[1] = X86_64_SSEUP_CLASS;
2148 /* Examine the argument and return set number of register required in each
2149 class. Return 0 iff parameter should be passed in memory. */
2151 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2152 enum machine_mode mode;
2154 int *int_nregs, *sse_nregs;
2157 enum x86_64_reg_class class[MAX_CLASSES];
2158 int n = classify_argument (mode, type, class, 0);
2164 for (n--; n >= 0; n--)
2167 case X86_64_INTEGER_CLASS:
2168 case X86_64_INTEGERSI_CLASS:
2171 case X86_64_SSE_CLASS:
2172 case X86_64_SSESF_CLASS:
2173 case X86_64_SSEDF_CLASS:
2176 case X86_64_NO_CLASS:
2177 case X86_64_SSEUP_CLASS:
2179 case X86_64_X87_CLASS:
2180 case X86_64_X87UP_CLASS:
2184 case X86_64_MEMORY_CLASS:
2189 /* Construct container for the argument used by GCC interface. See
2190 FUNCTION_ARG for the detailed description. */
2192 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2193 enum machine_mode mode;
2196 int nintregs, nsseregs;
2200 enum machine_mode tmpmode;
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 enum x86_64_reg_class class[MAX_CLASSES];
2207 int needed_sseregs, needed_intregs;
2208 rtx exp[MAX_CLASSES];
2211 n = classify_argument (mode, type, class, 0);
2212 if (TARGET_DEBUG_ARG)
2215 fprintf (stderr, "Memory class\n");
2218 fprintf (stderr, "Classes:");
2219 for (i = 0; i < n; i++)
2221 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2223 fprintf (stderr, "\n");
2228 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2230 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2233 /* First construct simple cases. Avoid SCmode, since we want to use
2234 single register to pass this type. */
2235 if (n == 1 && mode != SCmode)
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 return gen_rtx_REG (mode, intreg[0]);
2241 case X86_64_SSE_CLASS:
2242 case X86_64_SSESF_CLASS:
2243 case X86_64_SSEDF_CLASS:
2244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2245 case X86_64_X87_CLASS:
2246 return gen_rtx_REG (mode, FIRST_STACK_REG);
2247 case X86_64_NO_CLASS:
2248 /* Zero sized array, struct or class. */
2253 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2254 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2256 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2257 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2258 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2259 && class[1] == X86_64_INTEGER_CLASS
2260 && (mode == CDImode || mode == TImode)
2261 && intreg[0] + 1 == intreg[1])
2262 return gen_rtx_REG (mode, intreg[0]);
2264 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2265 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2266 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2268 /* Otherwise figure out the entries of the PARALLEL. */
2269 for (i = 0; i < n; i++)
2273 case X86_64_NO_CLASS:
2275 case X86_64_INTEGER_CLASS:
2276 case X86_64_INTEGERSI_CLASS:
2277 /* Merge TImodes on aligned occassions here too. */
2278 if (i * 8 + 8 > bytes)
2279 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2280 else if (class[i] == X86_64_INTEGERSI_CLASS)
2284 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2285 if (tmpmode == BLKmode)
2287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2288 gen_rtx_REG (tmpmode, *intreg),
2292 case X86_64_SSESF_CLASS:
2293 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2294 gen_rtx_REG (SFmode,
2295 SSE_REGNO (sse_regno)),
2299 case X86_64_SSEDF_CLASS:
2300 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2301 gen_rtx_REG (DFmode,
2302 SSE_REGNO (sse_regno)),
2306 case X86_64_SSE_CLASS:
2307 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (tmpmode,
2313 SSE_REGNO (sse_regno)),
2315 if (tmpmode == TImode)
2323 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2324 for (i = 0; i < nexps; i++)
2325 XVECEXP (ret, 0, i) = exp [i];
2329 /* Update the data in CUM to advance over an argument
2330 of mode MODE and data type TYPE.
2331 (TYPE is null for libcalls where that information may not be available.) */
2334 function_arg_advance (cum, mode, type, named)
2335 CUMULATIVE_ARGS *cum; /* current arg information */
2336 enum machine_mode mode; /* current arg mode */
2337 tree type; /* type of the argument or 0 if lib support */
2338 int named; /* whether or not the argument was named */
2341 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2342 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2344 if (TARGET_DEBUG_ARG)
2346 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2347 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2350 int int_nregs, sse_nregs;
2351 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2352 cum->words += words;
2353 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2355 cum->nregs -= int_nregs;
2356 cum->sse_nregs -= sse_nregs;
2357 cum->regno += int_nregs;
2358 cum->sse_regno += sse_nregs;
2361 cum->words += words;
2365 if (TARGET_SSE && mode == TImode)
2367 cum->sse_words += words;
2368 cum->sse_nregs -= 1;
2369 cum->sse_regno += 1;
2370 if (cum->sse_nregs <= 0)
2378 cum->words += words;
2379 cum->nregs -= words;
2380 cum->regno += words;
2382 if (cum->nregs <= 0)
2392 /* Define where to put the arguments to a function.
2393 Value is zero to push the argument on the stack,
2394 or a hard register in which to store the argument.
2396 MODE is the argument's machine mode.
2397 TYPE is the data type of the argument (as a tree).
2398 This is null for libcalls where that information may
2400 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2401 the preceding args and about the function being called.
2402 NAMED is nonzero if this argument is a named parameter
2403 (otherwise it is an extra parameter matching an ellipsis). */
2406 function_arg (cum, mode, type, named)
2407 CUMULATIVE_ARGS *cum; /* current arg information */
2408 enum machine_mode mode; /* current arg mode */
2409 tree type; /* type of the argument or 0 if lib support */
2410 int named; /* != 0 for normal args, == 0 for ... args */
2414 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2415 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2417 /* Handle an hidden AL argument containing number of registers for varargs
2418 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2420 if (mode == VOIDmode)
2423 return GEN_INT (cum->maybe_vaarg
2424 ? (cum->sse_nregs < 0
2432 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2433 &x86_64_int_parameter_registers [cum->regno],
2438 /* For now, pass fp/complex values on the stack. */
2447 if (words <= cum->nregs)
2449 int regno = cum->regno;
2451 /* Fastcall allocates the first two DWORD (SImode) or
2452 smaller arguments to ECX and EDX. */
2455 if (mode == BLKmode || mode == DImode)
2458 /* ECX not EAX is the first allocated register. */
2462 ret = gen_rtx_REG (mode, regno);
2467 ret = gen_rtx_REG (mode, cum->sse_regno);
2471 if (TARGET_DEBUG_ARG)
2474 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2475 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2478 print_simple_rtl (stderr, ret);
2480 fprintf (stderr, ", stack");
2482 fprintf (stderr, " )\n");
2488 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2492 ix86_function_arg_boundary (mode, type)
2493 enum machine_mode mode;
2498 return PARM_BOUNDARY;
2500 align = TYPE_ALIGN (type);
2502 align = GET_MODE_ALIGNMENT (mode);
2503 if (align < PARM_BOUNDARY)
2504 align = PARM_BOUNDARY;
2510 /* Return true if N is a possible register number of function value. */
2512 ix86_function_value_regno_p (regno)
2517 return ((regno) == 0
2518 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2519 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2521 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2522 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2523 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2526 /* Define how to find the value returned by a function.
2527 VALTYPE is the data type of the value (as a tree).
2528 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2529 otherwise, FUNC is 0. */
2531 ix86_function_value (valtype)
2536 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2537 REGPARM_MAX, SSE_REGPARM_MAX,
2538 x86_64_int_return_registers, 0);
2539 /* For zero sized structures, construct_continer return NULL, but we need
2540 to keep rest of compiler happy by returning meaningfull value. */
2542 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2546 return gen_rtx_REG (TYPE_MODE (valtype),
2547 ix86_value_regno (TYPE_MODE (valtype)));
2550 /* Return false iff type is returned in memory. */
2552 ix86_return_in_memory (type)
2555 int needed_intregs, needed_sseregs;
2558 return !examine_argument (TYPE_MODE (type), type, 1,
2559 &needed_intregs, &needed_sseregs);
2563 if (TYPE_MODE (type) == BLKmode
2564 || (VECTOR_MODE_P (TYPE_MODE (type))
2565 && int_size_in_bytes (type) == 8)
2566 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2567 && TYPE_MODE (type) != TFmode
2568 && !VECTOR_MODE_P (TYPE_MODE (type))))
2574 /* Define how to find the value returned by a library function
2575 assuming the value has mode MODE. */
2577 ix86_libcall_value (mode)
2578 enum machine_mode mode;
2588 return gen_rtx_REG (mode, FIRST_SSE_REG);
2591 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2593 return gen_rtx_REG (mode, 0);
2597 return gen_rtx_REG (mode, ix86_value_regno (mode));
2600 /* Given a mode, return the register to use for a return value. */
2603 ix86_value_regno (mode)
2604 enum machine_mode mode;
2606 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2607 return FIRST_FLOAT_REG;
2608 if (mode == TImode || VECTOR_MODE_P (mode))
2609 return FIRST_SSE_REG;
2613 /* Create the va_list data type. */
2616 ix86_build_va_list ()
2618 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2620 /* For i386 we use plain pointer to argument area. */
2622 return build_pointer_type (char_type_node);
2624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2625 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2627 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2628 unsigned_type_node);
2629 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2630 unsigned_type_node);
2631 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2633 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2636 DECL_FIELD_CONTEXT (f_gpr) = record;
2637 DECL_FIELD_CONTEXT (f_fpr) = record;
2638 DECL_FIELD_CONTEXT (f_ovf) = record;
2639 DECL_FIELD_CONTEXT (f_sav) = record;
2641 TREE_CHAIN (record) = type_decl;
2642 TYPE_NAME (record) = type_decl;
2643 TYPE_FIELDS (record) = f_gpr;
2644 TREE_CHAIN (f_gpr) = f_fpr;
2645 TREE_CHAIN (f_fpr) = f_ovf;
2646 TREE_CHAIN (f_ovf) = f_sav;
2648 layout_type (record);
2650 /* The correct type is an array type of one element. */
2651 return build_array_type (record, build_index_type (size_zero_node));
2654 /* Perform any needed actions needed for a function that is receiving a
2655 variable number of arguments.
2659 MODE and TYPE are the mode and type of the current parameter.
2661 PRETEND_SIZE is a variable that should be set to the amount of stack
2662 that must be pushed by the prolog to pretend that our caller pushed
2665 Normally, this macro will push all remaining incoming registers on the
2666 stack and set PRETEND_SIZE to the length of the registers pushed. */
2669 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2670 CUMULATIVE_ARGS *cum;
2671 enum machine_mode mode;
2673 int *pretend_size ATTRIBUTE_UNUSED;
2677 CUMULATIVE_ARGS next_cum;
2678 rtx save_area = NULL_RTX, mem;
2691 /* Indicate to allocate space on the stack for varargs save area. */
2692 ix86_save_varrargs_registers = 1;
2694 fntype = TREE_TYPE (current_function_decl);
2695 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2697 != void_type_node));
2699 /* For varargs, we do not want to skip the dummy va_dcl argument.
2700 For stdargs, we do want to skip the last named argument. */
2703 function_arg_advance (&next_cum, mode, type, 1);
2706 save_area = frame_pointer_rtx;
2708 set = get_varargs_alias_set ();
2710 for (i = next_cum.regno; i < ix86_regparm; i++)
2712 mem = gen_rtx_MEM (Pmode,
2713 plus_constant (save_area, i * UNITS_PER_WORD));
2714 set_mem_alias_set (mem, set);
2715 emit_move_insn (mem, gen_rtx_REG (Pmode,
2716 x86_64_int_parameter_registers[i]));
2719 if (next_cum.sse_nregs)
2721 /* Now emit code to save SSE registers. The AX parameter contains number
2722 of SSE parameter regsiters used to call this function. We use
2723 sse_prologue_save insn template that produces computed jump across
2724 SSE saves. We need some preparation work to get this working. */
2726 label = gen_label_rtx ();
2727 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2729 /* Compute address to jump to :
2730 label - 5*eax + nnamed_sse_arguments*5 */
2731 tmp_reg = gen_reg_rtx (Pmode);
2732 nsse_reg = gen_reg_rtx (Pmode);
2733 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2734 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2735 gen_rtx_MULT (Pmode, nsse_reg,
2737 if (next_cum.sse_regno)
2740 gen_rtx_CONST (DImode,
2741 gen_rtx_PLUS (DImode,
2743 GEN_INT (next_cum.sse_regno * 4))));
2745 emit_move_insn (nsse_reg, label_ref);
2746 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2748 /* Compute address of memory block we save into. We always use pointer
2749 pointing 127 bytes after first byte to store - this is needed to keep
2750 instruction size limited by 4 bytes. */
2751 tmp_reg = gen_reg_rtx (Pmode);
2752 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2753 plus_constant (save_area,
2754 8 * REGPARM_MAX + 127)));
2755 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2756 set_mem_alias_set (mem, set);
2757 set_mem_align (mem, BITS_PER_WORD);
2759 /* And finally do the dirty job! */
2760 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2761 GEN_INT (next_cum.sse_regno), label));
2766 /* Implement va_start. */
2769 ix86_va_start (valist, nextarg)
2773 HOST_WIDE_INT words, n_gpr, n_fpr;
2774 tree f_gpr, f_fpr, f_ovf, f_sav;
2775 tree gpr, fpr, ovf, sav, t;
2777 /* Only 64bit target needs something special. */
2780 std_expand_builtin_va_start (valist, nextarg);
2784 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2785 f_fpr = TREE_CHAIN (f_gpr);
2786 f_ovf = TREE_CHAIN (f_fpr);
2787 f_sav = TREE_CHAIN (f_ovf);
2789 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2790 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2791 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2792 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2793 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2795 /* Count number of gp and fp argument registers used. */
2796 words = current_function_args_info.words;
2797 n_gpr = current_function_args_info.regno;
2798 n_fpr = current_function_args_info.sse_regno;
2800 if (TARGET_DEBUG_ARG)
2801 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2802 (int) words, (int) n_gpr, (int) n_fpr);
2804 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2805 build_int_2 (n_gpr * 8, 0));
2806 TREE_SIDE_EFFECTS (t) = 1;
2807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2810 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2811 TREE_SIDE_EFFECTS (t) = 1;
2812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2814 /* Find the overflow area. */
2815 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2817 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2818 build_int_2 (words * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2823 /* Find the register save area.
2824 Prologue of the function save it right above stack frame. */
2825 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2826 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2827 TREE_SIDE_EFFECTS (t) = 1;
2828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2831 /* Implement va_arg. */
2833 ix86_va_arg (valist, type)
2836 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2837 tree f_gpr, f_fpr, f_ovf, f_sav;
2838 tree gpr, fpr, ovf, sav, t;
2840 rtx lab_false, lab_over = NULL_RTX;
2844 /* Only 64bit target needs something special. */
2847 return std_expand_builtin_va_arg (valist, type);
2850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2851 f_fpr = TREE_CHAIN (f_gpr);
2852 f_ovf = TREE_CHAIN (f_fpr);
2853 f_sav = TREE_CHAIN (f_ovf);
2855 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2856 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2857 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2858 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2859 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2861 size = int_size_in_bytes (type);
2862 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2864 container = construct_container (TYPE_MODE (type), type, 0,
2865 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2867 * Pull the value out of the saved registers ...
2870 addr_rtx = gen_reg_rtx (Pmode);
2874 rtx int_addr_rtx, sse_addr_rtx;
2875 int needed_intregs, needed_sseregs;
2878 lab_over = gen_label_rtx ();
2879 lab_false = gen_label_rtx ();
2881 examine_argument (TYPE_MODE (type), type, 0,
2882 &needed_intregs, &needed_sseregs);
2885 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2886 || TYPE_ALIGN (type) > 128);
2888 /* In case we are passing structure, verify that it is consetuctive block
2889 on the register save area. If not we need to do moves. */
2890 if (!need_temp && !REG_P (container))
2892 /* Verify that all registers are strictly consetuctive */
2893 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2897 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2899 rtx slot = XVECEXP (container, 0, i);
2900 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2901 || INTVAL (XEXP (slot, 1)) != i * 16)
2909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2911 rtx slot = XVECEXP (container, 0, i);
2912 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2913 || INTVAL (XEXP (slot, 1)) != i * 8)
2920 int_addr_rtx = addr_rtx;
2921 sse_addr_rtx = addr_rtx;
2925 int_addr_rtx = gen_reg_rtx (Pmode);
2926 sse_addr_rtx = gen_reg_rtx (Pmode);
2928 /* First ensure that we fit completely in registers. */
2931 emit_cmp_and_jump_insns (expand_expr
2932 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2933 GEN_INT ((REGPARM_MAX - needed_intregs +
2934 1) * 8), GE, const1_rtx, SImode,
2939 emit_cmp_and_jump_insns (expand_expr
2940 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2941 GEN_INT ((SSE_REGPARM_MAX -
2942 needed_sseregs + 1) * 16 +
2943 REGPARM_MAX * 8), GE, const1_rtx,
2944 SImode, 1, lab_false);
2947 /* Compute index to start of area used for integer regs. */
2950 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2951 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != int_addr_rtx)
2953 emit_move_insn (int_addr_rtx, r);
2957 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2958 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2959 if (r != sse_addr_rtx)
2960 emit_move_insn (sse_addr_rtx, r);
2967 /* Never use the memory itself, as it has the alias set. */
2968 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2969 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2970 set_mem_alias_set (mem, get_varargs_alias_set ());
2971 set_mem_align (mem, BITS_PER_UNIT);
2973 for (i = 0; i < XVECLEN (container, 0); i++)
2975 rtx slot = XVECEXP (container, 0, i);
2976 rtx reg = XEXP (slot, 0);
2977 enum machine_mode mode = GET_MODE (reg);
2983 if (SSE_REGNO_P (REGNO (reg)))
2985 src_addr = sse_addr_rtx;
2986 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2990 src_addr = int_addr_rtx;
2991 src_offset = REGNO (reg) * 8;
2993 src_mem = gen_rtx_MEM (mode, src_addr);
2994 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2995 src_mem = adjust_address (src_mem, mode, src_offset);
2996 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2997 emit_move_insn (dest_mem, src_mem);
3004 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3005 build_int_2 (needed_intregs * 8, 0));
3006 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3013 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3014 build_int_2 (needed_sseregs * 16, 0));
3015 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3016 TREE_SIDE_EFFECTS (t) = 1;
3017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3020 emit_jump_insn (gen_jump (lab_over));
3022 emit_label (lab_false);
3025 /* ... otherwise out of the overflow area. */
3027 /* Care for on-stack alignment if needed. */
3028 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3032 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3033 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3034 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3038 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3040 emit_move_insn (addr_rtx, r);
3043 build (PLUS_EXPR, TREE_TYPE (t), t,
3044 build_int_2 (rsize * UNITS_PER_WORD, 0));
3045 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3050 emit_label (lab_over);
3055 /* Return nonzero if OP is either a i387 or SSE fp register. */
3057 any_fp_register_operand (op, mode)
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 return ANY_FP_REG_P (op);
3064 /* Return nonzero if OP is an i387 fp register. */
3066 fp_register_operand (op, mode)
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3070 return FP_REG_P (op);
3073 /* Return nonzero if OP is a non-fp register_operand. */
3075 register_and_not_any_fp_reg_operand (op, mode)
3077 enum machine_mode mode;
3079 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3082 /* Return nonzero of OP is a register operand other than an
3083 i387 fp register. */
3085 register_and_not_fp_reg_operand (op, mode)
3087 enum machine_mode mode;
3089 return register_operand (op, mode) && !FP_REG_P (op);
3092 /* Return nonzero if OP is general operand representable on x86_64. */
3095 x86_64_general_operand (op, mode)
3097 enum machine_mode mode;
3100 return general_operand (op, mode);
3101 if (nonimmediate_operand (op, mode))
3103 return x86_64_sign_extended_value (op);
3106 /* Return nonzero if OP is general operand representable on x86_64
3107 as either sign extended or zero extended constant. */
3110 x86_64_szext_general_operand (op, mode)
3112 enum machine_mode mode;
3115 return general_operand (op, mode);
3116 if (nonimmediate_operand (op, mode))
3118 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3121 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3124 x86_64_nonmemory_operand (op, mode)
3126 enum machine_mode mode;
3129 return nonmemory_operand (op, mode);
3130 if (register_operand (op, mode))
3132 return x86_64_sign_extended_value (op);
3135 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3138 x86_64_movabs_operand (op, mode)
3140 enum machine_mode mode;
3142 if (!TARGET_64BIT || !flag_pic)
3143 return nonmemory_operand (op, mode);
3144 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3146 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3151 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3154 x86_64_szext_nonmemory_operand (op, mode)
3156 enum machine_mode mode;
3159 return nonmemory_operand (op, mode);
3160 if (register_operand (op, mode))
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3165 /* Return nonzero if OP is immediate operand representable on x86_64. */
3168 x86_64_immediate_operand (op, mode)
3170 enum machine_mode mode;
3173 return immediate_operand (op, mode);
3174 return x86_64_sign_extended_value (op);
3177 /* Return nonzero if OP is immediate operand representable on x86_64. */
3180 x86_64_zext_immediate_operand (op, mode)
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3184 return x86_64_zero_extended_value (op);
3187 /* Return nonzero if OP is (const_int 1), else return zero. */
3190 const_int_1_operand (op, mode)
3192 enum machine_mode mode ATTRIBUTE_UNUSED;
3194 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3197 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3198 for shift & compare patterns, as shifting by 0 does not change flags),
3199 else return zero. */
3202 const_int_1_31_operand (op, mode)
3204 enum machine_mode mode ATTRIBUTE_UNUSED;
3206 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3209 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3210 reference and a constant. */
3213 symbolic_operand (op, mode)
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3217 switch (GET_CODE (op))
3225 if (GET_CODE (op) == SYMBOL_REF
3226 || GET_CODE (op) == LABEL_REF
3227 || (GET_CODE (op) == UNSPEC
3228 && (XINT (op, 1) == UNSPEC_GOT
3229 || XINT (op, 1) == UNSPEC_GOTOFF
3230 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3232 if (GET_CODE (op) != PLUS
3233 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3237 if (GET_CODE (op) == SYMBOL_REF
3238 || GET_CODE (op) == LABEL_REF)
3240 /* Only @GOTOFF gets offsets. */
3241 if (GET_CODE (op) != UNSPEC
3242 || XINT (op, 1) != UNSPEC_GOTOFF)
3245 op = XVECEXP (op, 0, 0);
3246 if (GET_CODE (op) == SYMBOL_REF
3247 || GET_CODE (op) == LABEL_REF)
3256 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3259 pic_symbolic_operand (op, mode)
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3263 if (GET_CODE (op) != CONST)
3268 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3273 if (GET_CODE (op) == UNSPEC)
3275 if (GET_CODE (op) != PLUS
3276 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3279 if (GET_CODE (op) == UNSPEC)
3285 /* Return true if OP is a symbolic operand that resolves locally. */
3288 local_symbolic_operand (op, mode)
3290 enum machine_mode mode ATTRIBUTE_UNUSED;
3292 if (GET_CODE (op) == CONST
3293 && GET_CODE (XEXP (op, 0)) == PLUS
3294 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3295 op = XEXP (XEXP (op, 0), 0);
3297 if (GET_CODE (op) == LABEL_REF)
3300 if (GET_CODE (op) != SYMBOL_REF)
3303 /* These we've been told are local by varasm and encode_section_info
3305 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3308 /* There is, however, a not insubstantial body of code in the rest of
3309 the compiler that assumes it can just stick the results of
3310 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3311 /* ??? This is a hack. Should update the body of the compiler to
3312 always create a DECL an invoke targetm.encode_section_info. */
3313 if (strncmp (XSTR (op, 0), internal_label_prefix,
3314 internal_label_prefix_len) == 0)
3320 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3323 tls_symbolic_operand (op, mode)
3325 enum machine_mode mode ATTRIBUTE_UNUSED;
3327 const char *symbol_str;
3329 if (GET_CODE (op) != SYMBOL_REF)
3331 symbol_str = XSTR (op, 0);
3333 if (symbol_str[0] != '%')
3335 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3339 tls_symbolic_operand_1 (op, kind)
3341 enum tls_model kind;
3343 const char *symbol_str;
3345 if (GET_CODE (op) != SYMBOL_REF)
3347 symbol_str = XSTR (op, 0);
3349 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3353 global_dynamic_symbolic_operand (op, mode)
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3357 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3361 local_dynamic_symbolic_operand (op, mode)
3363 enum machine_mode mode ATTRIBUTE_UNUSED;
3365 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3369 initial_exec_symbolic_operand (op, mode)
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3373 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3377 local_exec_symbolic_operand (op, mode)
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3381 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3384 /* Test for a valid operand for a call instruction. Don't allow the
3385 arg pointer register or virtual regs since they may decay into
3386 reg + const, which the patterns can't handle. */
3389 call_insn_operand (op, mode)
3391 enum machine_mode mode ATTRIBUTE_UNUSED;
3393 /* Disallow indirect through a virtual register. This leads to
3394 compiler aborts when trying to eliminate them. */
3395 if (GET_CODE (op) == REG
3396 && (op == arg_pointer_rtx
3397 || op == frame_pointer_rtx
3398 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3399 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3402 /* Disallow `call 1234'. Due to varying assembler lameness this
3403 gets either rejected or translated to `call .+1234'. */
3404 if (GET_CODE (op) == CONST_INT)
3407 /* Explicitly allow SYMBOL_REF even if pic. */
3408 if (GET_CODE (op) == SYMBOL_REF)
3411 /* Otherwise we can allow any general_operand in the address. */
3412 return general_operand (op, Pmode);
3415 /* Test for a valid operand for a call instruction. Don't allow the
3416 arg pointer register or virtual regs since they may decay into
3417 reg + const, which the patterns can't handle. */
3420 sibcall_insn_operand (op, mode)
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3424 /* Disallow indirect through a virtual register. This leads to
3425 compiler aborts when trying to eliminate them. */
3426 if (GET_CODE (op) == REG
3427 && (op == arg_pointer_rtx
3428 || op == frame_pointer_rtx
3429 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3430 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3433 /* Explicitly allow SYMBOL_REF even if pic. */
3434 if (GET_CODE (op) == SYMBOL_REF)
3437 /* Otherwise we can only allow register operands. */
3438 return register_operand (op, Pmode);
3442 constant_call_address_operand (op, mode)
3444 enum machine_mode mode ATTRIBUTE_UNUSED;
3446 if (GET_CODE (op) == CONST
3447 && GET_CODE (XEXP (op, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3449 op = XEXP (XEXP (op, 0), 0);
3450 return GET_CODE (op) == SYMBOL_REF;
3453 /* Match exactly zero and one. */
3456 const0_operand (op, mode)
3458 enum machine_mode mode;
3460 return op == CONST0_RTX (mode);
3464 const1_operand (op, mode)
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3468 return op == const1_rtx;
3471 /* Match 2, 4, or 8. Used for leal multiplicands. */
3474 const248_operand (op, mode)
3476 enum machine_mode mode ATTRIBUTE_UNUSED;
3478 return (GET_CODE (op) == CONST_INT
3479 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3482 /* True if this is a constant appropriate for an increment or decremenmt. */
3485 incdec_operand (op, mode)
3487 enum machine_mode mode ATTRIBUTE_UNUSED;
3489 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3490 registers, since carry flag is not set. */
3491 if (TARGET_PENTIUM4 && !optimize_size)
3493 return op == const1_rtx || op == constm1_rtx;
3496 /* Return nonzero if OP is acceptable as operand of DImode shift
3500 shiftdi_operand (op, mode)
3502 enum machine_mode mode ATTRIBUTE_UNUSED;
3505 return nonimmediate_operand (op, mode);
3507 return register_operand (op, mode);
3510 /* Return false if this is the stack pointer, or any other fake
3511 register eliminable to the stack pointer. Otherwise, this is
3514 This is used to prevent esp from being used as an index reg.
3515 Which would only happen in pathological cases. */
3518 reg_no_sp_operand (op, mode)
3520 enum machine_mode mode;
3523 if (GET_CODE (t) == SUBREG)
3525 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3528 return register_operand (op, mode);
3532 mmx_reg_operand (op, mode)
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3536 return MMX_REG_P (op);
3539 /* Return false if this is any eliminable register. Otherwise
3543 general_no_elim_operand (op, mode)
3545 enum machine_mode mode;
3548 if (GET_CODE (t) == SUBREG)
3550 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3551 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3552 || t == virtual_stack_dynamic_rtx)
3555 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3556 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3559 return general_operand (op, mode);
3562 /* Return false if this is any eliminable register. Otherwise
3563 register_operand or const_int. */
3566 nonmemory_no_elim_operand (op, mode)
3568 enum machine_mode mode;
3571 if (GET_CODE (t) == SUBREG)
3573 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3574 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3575 || t == virtual_stack_dynamic_rtx)
3578 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3581 /* Return false if this is any eliminable register or stack register,
3582 otherwise work like register_operand. */
3585 index_register_operand (op, mode)
3587 enum machine_mode mode;
3590 if (GET_CODE (t) == SUBREG)
3594 if (t == arg_pointer_rtx
3595 || t == frame_pointer_rtx
3596 || t == virtual_incoming_args_rtx
3597 || t == virtual_stack_vars_rtx
3598 || t == virtual_stack_dynamic_rtx
3599 || REGNO (t) == STACK_POINTER_REGNUM)
3602 return general_operand (op, mode);
3605 /* Return true if op is a Q_REGS class register. */
3608 q_regs_operand (op, mode)
3610 enum machine_mode mode;
3612 if (mode != VOIDmode && GET_MODE (op) != mode)
3614 if (GET_CODE (op) == SUBREG)
3615 op = SUBREG_REG (op);
3616 return ANY_QI_REG_P (op);
3619 /* Return true if op is an flags register. */
3622 flags_reg_operand (op, mode)
3624 enum machine_mode mode;
3626 if (mode != VOIDmode && GET_MODE (op) != mode)
3628 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3631 /* Return true if op is a NON_Q_REGS class register. */
3634 non_q_regs_operand (op, mode)
3636 enum machine_mode mode;
3638 if (mode != VOIDmode && GET_MODE (op) != mode)
3640 if (GET_CODE (op) == SUBREG)
3641 op = SUBREG_REG (op);
3642 return NON_QI_REG_P (op);
3646 zero_extended_scalar_load_operand (op, mode)
3648 enum machine_mode mode ATTRIBUTE_UNUSED;
3651 if (GET_CODE (op) != MEM)
3653 op = maybe_get_pool_constant (op);
3656 if (GET_CODE (op) != CONST_VECTOR)
3659 (GET_MODE_SIZE (GET_MODE (op)) /
3660 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3661 for (n_elts--; n_elts > 0; n_elts--)
3663 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3664 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3673 sse_comparison_operator (op, mode)
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 enum rtx_code code = GET_CODE (op);
3680 /* Operations supported directly. */
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3697 return !TARGET_IEEE_FP;
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3704 ix86_comparison_operator (op, mode)
3706 enum machine_mode mode;
3708 enum machine_mode inmode;
3709 enum rtx_code code = GET_CODE (op);
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3712 if (GET_RTX_CLASS (code) != '<')
3714 inmode = GET_MODE (XEXP (op, 0));
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3727 if (inmode == CCmode || inmode == CCGCmode
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3732 if (inmode == CCmode)
3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3747 fcmov_comparison_operator (op, mode)
3749 enum machine_mode mode;
3751 enum machine_mode inmode;
3752 enum rtx_code code = GET_CODE (op);
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3755 if (GET_RTX_CLASS (code) != '<')
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3764 code = ix86_fp_compare_code_to_integer (code);
3766 /* i387 supports just limited amount of conditional codes. */
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3773 case ORDERED: case UNORDERED:
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3784 promotable_binary_operator (op, mode)
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3788 switch (GET_CODE (op))
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3810 cmp_fp_expander_operand (op, mode)
3812 enum machine_mode mode;
3814 if (mode != VOIDmode && mode != GET_MODE (op))
3816 if (GET_CODE (op) == CONST_DOUBLE)
3818 return general_operand (op, mode);
3821 /* Match an SI or HImode register for a zero_extract. */
3824 ext_register_operand (op, mode)
3826 enum machine_mode mode ATTRIBUTE_UNUSED;
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3833 if (!register_operand (op, VOIDmode))
3836 /* Be curefull to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3845 binary_fp_operator (op, mode)
3847 enum machine_mode mode;
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3852 switch (GET_CODE (op))
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3866 mult_operator (op, mode)
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3870 return GET_CODE (op) == MULT;
3874 div_operator (op, mode)
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3878 return GET_CODE (op) == DIV;
3882 arith_or_logical_operator (op, mode)
3884 enum machine_mode mode;
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3891 /* Returns 1 if OP is memory operand with a displacement. */
3894 memory_displacement_operand (op, mode)
3896 enum machine_mode mode;
3898 struct ix86_address parts;
3900 if (! memory_operand (op, mode))
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3906 return parts.disp != NULL_RTX;
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3916 cmpsi_operand (op, mode)
3918 enum machine_mode mode;
3920 if (nonimmediate_operand (op, mode))
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3940 long_memory_operand (op, mode)
3942 enum machine_mode mode;
3944 if (! memory_operand (op, mode))
3947 return memory_address_length (op) != 0;
3950 /* Return nonzero if the rtx is known aligned. */
3953 aligned_operand (op, mode)
3955 enum machine_mode mode;
3957 struct ix86_address parts;
3959 if (!general_operand (op, mode))
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op) != MEM)
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op))
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3986 /* Look for some component that isn't known to be aligned. */
3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4005 /* Didn't find one -- this must be an aligned address. */
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4014 standard_80387_constant_p (x)
4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4024 if (x == CONST1_RTX (GET_MODE (x)))
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4032 standard_sse_constant_p (x)
4035 if (x == const0_rtx)
4037 return (x == CONST0_RTX (GET_MODE (x)));
4040 /* Returns 1 if OP contains a symbol reference */
4043 symbolic_reference_mentioned_p (op)
4046 register const char *fmt;
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4084 ix86_can_use_return_insn_p ()
4086 struct ix86_frame frame;
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4093 if (! reload_completed || frame_pointer_needed)
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4108 x86_64_sign_extended_value (value)
4111 switch (GET_CODE (value))
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4122 return trunc_int_for_mode (val, SImode) == val;
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4133 /* For certain code models, the code is near as well. */
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
4138 /* We also may accept the offsetted memory references in certain special
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4144 case UNSPEC_GOTPCREL:
4146 case UNSPEC_GOTNTPOFF:
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4158 if (ix86_cmodel == CM_LARGE)
4160 if (GET_CODE (op2) != CONST_INT)
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
4171 && offset < 16*1024*1024
4172 && trunc_int_for_mode (offset, SImode) == offset)
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel == CM_KERNEL
4180 && trunc_int_for_mode (offset, SImode) == offset)
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4187 && offset < 16*1024*1024
4188 && trunc_int_for_mode (offset, SImode) == offset)
4190 if (ix86_cmodel == CM_KERNEL
4192 && trunc_int_for_mode (offset, SImode) == offset)
4196 switch (XINT (op1, 1))
4201 && trunc_int_for_mode (offset, SImode) == offset)
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4217 x86_64_zero_extended_value (value)
4220 switch (GET_CODE (value))
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4235 /* For certain code models, the symbolic references are known to fit. */
4237 return ix86_cmodel == CM_SMALL;
4239 /* For certain code models, the code is near as well. */
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4243 /* We also may accept the offsetted memory references in certain special
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 if (ix86_cmodel == CM_LARGE)
4253 switch (GET_CODE (op1))
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required ()
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4316 if (current_function_profile)
4322 /* Record that the current function accesses previous call frames. */
4325 ix86_setup_frame_addresses ()
4327 cfun->machine->accesses_prev_frame = 1;
4330 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4333 # define USE_HIDDEN_LINKONCE 0
4336 static int pic_labels_used;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4342 get_pc_thunk_name (name, regno)
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4357 ix86_asm_file_end (file)
4363 for (regno = 0; regno < 8; ++regno)
4367 if (! ((pic_labels_used >> regno) & 1))
4370 get_pc_thunk_name (name, regno);
4372 if (USE_HIDDEN_LINKONCE)
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4385 (*targetm.asm_out.globalize_label) (file, name);
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4394 ASM_OUTPUT_LABEL (file, name);
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
4404 /* Emit code for the SET_GOT patterns. */
4407 output_set_got (dest)
4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4422 output_asm_insn ("call\t%a2", xops);
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4433 output_asm_insn ("pop{l}\t%0", xops);
4438 get_pc_thunk_name (name, REGNO (dest));
4439 pic_labels_used |= 1 << REGNO (dest);
4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4448 else if (!TARGET_MACHO)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4454 /* Generate an "push" pattern for input ARG. */
4460 return gen_rtx_SET (VOIDmode,
4462 gen_rtx_PRE_DEC (Pmode,
4463 stack_pointer_rtx)),
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4471 ix86_select_alt_pic_regnum ()
4473 if (current_function_is_leaf && !current_function_profile)
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4481 return INVALID_REGNUM;
4484 /* Return 1 if we need to save REGNO. */
4486 ix86_save_reg (regno, maybe_eh_return)
4488 int maybe_eh_return;
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4493 || current_function_profile
4494 || current_function_calls_eh_return))
4496 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4501 if (current_function_calls_eh_return && maybe_eh_return)
4506 unsigned test = EH_RETURN_DATA_REGNO (i);
4507 if (test == INVALID_REGNUM)
4514 return (regs_ever_live[regno]
4515 && !call_used_regs[regno]
4516 && !fixed_regs[regno]
4517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4520 /* Return number of registers to be saved on the stack. */
4528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4529 if (ix86_save_reg (regno, true))
4534 /* Return the offset between two registers, one to be eliminated, and the other
4535 its replacement, at the start of a routine. */
4538 ix86_initial_elimination_offset (from, to)
4542 struct ix86_frame frame;
4543 ix86_compute_frame_layout (&frame);
4545 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4546 return frame.hard_frame_pointer_offset;
4547 else if (from == FRAME_POINTER_REGNUM
4548 && to == HARD_FRAME_POINTER_REGNUM)
4549 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4552 if (to != STACK_POINTER_REGNUM)
4554 else if (from == ARG_POINTER_REGNUM)
4555 return frame.stack_pointer_offset;
4556 else if (from != FRAME_POINTER_REGNUM)
4559 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4563 /* Fill structure ix86_frame about frame of currently computed function. */
4566 ix86_compute_frame_layout (frame)
4567 struct ix86_frame *frame;
4569 HOST_WIDE_INT total_size;
4570 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4572 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4573 HOST_WIDE_INT size = get_frame_size ();
4575 frame->nregs = ix86_nsaved_regs ();
4578 /* Skip return address and saved base pointer. */
4579 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4581 frame->hard_frame_pointer_offset = offset;
4583 /* Do some sanity checking of stack_alignment_needed and
4584 preferred_alignment, since i386 port is the only using those features
4585 that may break easily. */
4587 if (size && !stack_alignment_needed)
4589 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4591 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4593 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4596 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4597 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4599 /* Register save area */
4600 offset += frame->nregs * UNITS_PER_WORD;
4603 if (ix86_save_varrargs_registers)
4605 offset += X86_64_VARARGS_SIZE;
4606 frame->va_arg_size = X86_64_VARARGS_SIZE;
4609 frame->va_arg_size = 0;
4611 /* Align start of frame for local function. */
4612 frame->padding1 = ((offset + stack_alignment_needed - 1)
4613 & -stack_alignment_needed) - offset;
4615 offset += frame->padding1;
4617 /* Frame pointer points here. */
4618 frame->frame_pointer_offset = offset;
4622 /* Add outgoing arguments area. Can be skipped if we eliminated
4623 all the function calls as dead code. */
4624 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4626 offset += current_function_outgoing_args_size;
4627 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630 frame->outgoing_arguments_size = 0;
4632 /* Align stack boundary. Only needed if we're calling another function
4634 if (!current_function_is_leaf || current_function_calls_alloca)
4635 frame->padding2 = ((offset + preferred_alignment - 1)
4636 & -preferred_alignment) - offset;
4638 frame->padding2 = 0;
4640 offset += frame->padding2;
4642 /* We've reached end of stack frame. */
4643 frame->stack_pointer_offset = offset;
4645 /* Size prologue needs to allocate. */
4646 frame->to_allocate =
4647 (size + frame->padding1 + frame->padding2
4648 + frame->outgoing_arguments_size + frame->va_arg_size);
4650 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4651 && current_function_is_leaf)
4653 frame->red_zone_size = frame->to_allocate;
4654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658 frame->red_zone_size = 0;
4659 frame->to_allocate -= frame->red_zone_size;
4660 frame->stack_pointer_offset -= frame->red_zone_size;
4662 fprintf (stderr, "nregs: %i\n", frame->nregs);
4663 fprintf (stderr, "size: %i\n", size);
4664 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4665 fprintf (stderr, "padding1: %i\n", frame->padding1);
4666 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4667 fprintf (stderr, "padding2: %i\n", frame->padding2);
4668 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4669 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4670 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4671 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4672 frame->hard_frame_pointer_offset);
4673 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4677 /* Emit code to save registers in the prologue. */
4680 ix86_emit_save_regs ()
4685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4686 if (ix86_save_reg (regno, true))
4688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4689 RTX_FRAME_RELATED_P (insn) = 1;
4693 /* Emit code to save registers using MOV insns. First register
4694 is restored from POINTER + OFFSET. */
4696 ix86_emit_save_regs_using_mov (pointer, offset)
4698 HOST_WIDE_INT offset;
4703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4704 if (ix86_save_reg (regno, true))
4706 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4708 gen_rtx_REG (Pmode, regno));
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 offset += UNITS_PER_WORD;
4714 /* Expand the prologue into a bunch of separate insns. */
4717 ix86_expand_prologue ()
4721 struct ix86_frame frame;
4723 HOST_WIDE_INT allocate;
4725 ix86_compute_frame_layout (&frame);
4728 int count = frame.nregs;
4730 /* The fast prologue uses move instead of push to save registers. This
4731 is significantly longer, but also executes faster as modern hardware
4732 can execute the moves in parallel, but can't do that for push/pop.
4734 Be curefull about choosing what prologue to emit: When function takes
4735 many instructions to execute we may use slow version as well as in
4736 case function is known to be outside hot spot (this is known with
4737 feedback only). Weight the size of function by number of registers
4738 to save as it is cheap to use one or two push instructions but very
4739 slow to use many of them. */
4741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4742 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4743 || (flag_branch_probabilities
4744 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4745 use_fast_prologue_epilogue = 0;
4747 use_fast_prologue_epilogue = !expensive_function_p (count);
4748 if (TARGET_PROLOGUE_USING_MOVE)
4749 use_mov = use_fast_prologue_epilogue;
4752 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4753 slower on all targets. Also sdb doesn't like it. */
4755 if (frame_pointer_needed)
4757 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4758 RTX_FRAME_RELATED_P (insn) = 1;
4760 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4761 RTX_FRAME_RELATED_P (insn) = 1;
4764 allocate = frame.to_allocate;
4765 /* In case we are dealing only with single register and empty frame,
4766 push is equivalent of the mov+add sequence. */
4767 if (allocate == 0 && frame.nregs <= 1)
4771 ix86_emit_save_regs ();
4773 allocate += frame.nregs * UNITS_PER_WORD;
4777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack
4780 (stack_pointer_rtx, stack_pointer_rtx,
4781 GEN_INT (-allocate)));
4782 RTX_FRAME_RELATED_P (insn) = 1;
4786 /* ??? Is this only valid for Win32? */
4793 arg0 = gen_rtx_REG (SImode, 0);
4794 emit_move_insn (arg0, GEN_INT (allocate));
4796 sym = gen_rtx_MEM (FUNCTION_MODE,
4797 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4798 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4800 CALL_INSN_FUNCTION_USAGE (insn)
4801 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4802 CALL_INSN_FUNCTION_USAGE (insn));
4806 if (!frame_pointer_needed || !frame.to_allocate)
4807 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4809 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4810 -frame.nregs * UNITS_PER_WORD);
4813 #ifdef SUBTARGET_PROLOGUE
4817 pic_reg_used = false;
4818 if (pic_offset_table_rtx
4819 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4820 || current_function_profile))
4822 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4824 if (alt_pic_reg_used != INVALID_REGNUM)
4825 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4827 pic_reg_used = true;
4832 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4834 /* Even with accurate pre-reload life analysis, we can wind up
4835 deleting all references to the pic register after reload.
4836 Consider if cross-jumping unifies two sides of a branch
4837 controled by a comparison vs the only read from a global.
4838 In which case, allow the set_got to be deleted, though we're
4839 too late to do anything about the ebx save in the prologue. */
4840 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4843 /* Prevent function calls from be scheduled before the call to mcount.
4844 In the pic_reg_used case, make sure that the got load isn't deleted. */
4845 if (current_function_profile)
4846 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4849 /* Emit code to restore saved registers using MOV insns. First register
4850 is restored from POINTER + OFFSET. */
4852 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4855 int maybe_eh_return;
4859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4860 if (ix86_save_reg (regno, maybe_eh_return))
4862 emit_move_insn (gen_rtx_REG (Pmode, regno),
4863 adjust_address (gen_rtx_MEM (Pmode, pointer),
4865 offset += UNITS_PER_WORD;
4869 /* Restore function stack, frame, and registers. */
4872 ix86_expand_epilogue (style)
4876 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4877 struct ix86_frame frame;
4878 HOST_WIDE_INT offset;
4880 ix86_compute_frame_layout (&frame);
4882 /* Calculate start of saved registers relative to ebp. Special care
4883 must be taken for the normal return case of a function using
4884 eh_return: the eax and edx registers are marked as saved, but not
4885 restored along this path. */
4886 offset = frame.nregs;
4887 if (current_function_calls_eh_return && style != 2)
4889 offset *= -UNITS_PER_WORD;
4891 /* If we're only restoring one register and sp is not valid then
4892 using a move instruction to restore the register since it's
4893 less work than reloading sp and popping the register.
4895 The default code result in stack adjustment using add/lea instruction,
4896 while this code results in LEAVE instruction (or discrete equivalent),
4897 so it is profitable in some other cases as well. Especially when there
4898 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4899 and there is exactly one register to pop. This heruistic may need some
4900 tuning in future. */
4901 if ((!sp_valid && frame.nregs <= 1)
4902 || (TARGET_EPILOGUE_USING_MOVE
4903 && use_fast_prologue_epilogue
4904 && (frame.nregs > 1 || frame.to_allocate))
4905 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4906 || (frame_pointer_needed && TARGET_USE_LEAVE
4907 && use_fast_prologue_epilogue && frame.nregs == 1)
4908 || current_function_calls_eh_return)
4910 /* Restore registers. We can use ebp or esp to address the memory
4911 locations. If both are available, default to ebp, since offsets
4912 are known to be small. Only exception is esp pointing directly to the
4913 end of block of saved registers, where we may simplify addressing
4916 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4917 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4918 frame.to_allocate, style == 2);
4920 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4921 offset, style == 2);
4923 /* eh_return epilogues need %ecx added to the stack pointer. */
4926 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4928 if (frame_pointer_needed)
4930 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4931 tmp = plus_constant (tmp, UNITS_PER_WORD);
4932 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4934 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4935 emit_move_insn (hard_frame_pointer_rtx, tmp);
4937 emit_insn (gen_pro_epilogue_adjust_stack
4938 (stack_pointer_rtx, sa, const0_rtx));
4942 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4943 tmp = plus_constant (tmp, (frame.to_allocate
4944 + frame.nregs * UNITS_PER_WORD));
4945 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4948 else if (!frame_pointer_needed)
4949 emit_insn (gen_pro_epilogue_adjust_stack
4950 (stack_pointer_rtx, stack_pointer_rtx,
4951 GEN_INT (frame.to_allocate
4952 + frame.nregs * UNITS_PER_WORD)));
4953 /* If not an i386, mov & pop is faster than "leave". */
4954 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4955 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4958 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4959 hard_frame_pointer_rtx,
4962 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4964 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4969 /* First step is to deallocate the stack frame so that we can
4970 pop the registers. */
4973 if (!frame_pointer_needed)
4975 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4976 hard_frame_pointer_rtx,
4979 else if (frame.to_allocate)
4980 emit_insn (gen_pro_epilogue_adjust_stack
4981 (stack_pointer_rtx, stack_pointer_rtx,
4982 GEN_INT (frame.to_allocate)));
4984 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4985 if (ix86_save_reg (regno, false))
4988 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4990 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4992 if (frame_pointer_needed)
4994 /* Leave results in shorter dependency chains on CPUs that are
4995 able to grok it fast. */
4996 if (TARGET_USE_LEAVE)
4997 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4998 else if (TARGET_64BIT)
4999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5005 /* Sibcall epilogues don't want a return instruction. */
5009 if (current_function_pops_args && current_function_args_size)
5011 rtx popc = GEN_INT (current_function_pops_args);
5013 /* i386 can only pop 64K bytes. If asked to pop more, pop
5014 return address, do explicit add, and jump indirectly to the
5017 if (current_function_pops_args >= 65536)
5019 rtx ecx = gen_rtx_REG (SImode, 2);
5021 /* There are is no "pascal" calling convention in 64bit ABI. */
5025 emit_insn (gen_popsi1 (ecx));
5026 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5027 emit_jump_insn (gen_return_indirect_internal (ecx));
5030 emit_jump_insn (gen_return_pop_internal (popc));
5033 emit_jump_insn (gen_return_internal ());
5036 /* Reset from the function's potential modifications. */
5039 ix86_output_function_epilogue (file, size)
5040 FILE *file ATTRIBUTE_UNUSED;
5041 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5043 if (pic_offset_table_rtx)
5044 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5047 /* Extract the parts of an RTL expression that is a valid memory address
5048 for an instruction. Return 0 if the structure of the address is
5049 grossly off. Return -1 if the address contains ASHIFT, so it is not
5050 strictly valid, but still used for computing length of lea instruction.
5054 ix86_decompose_address (addr, out)
5056 struct ix86_address *out;
5058 rtx base = NULL_RTX;
5059 rtx index = NULL_RTX;
5060 rtx disp = NULL_RTX;
5061 HOST_WIDE_INT scale = 1;
5062 rtx scale_rtx = NULL_RTX;
5065 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5067 else if (GET_CODE (addr) == PLUS)
5069 rtx op0 = XEXP (addr, 0);
5070 rtx op1 = XEXP (addr, 1);
5071 enum rtx_code code0 = GET_CODE (op0);
5072 enum rtx_code code1 = GET_CODE (op1);
5074 if (code0 == REG || code0 == SUBREG)
5076 if (code1 == REG || code1 == SUBREG)
5077 index = op0, base = op1; /* index + base */
5079 base = op0, disp = op1; /* base + displacement */
5081 else if (code0 == MULT)
5083 index = XEXP (op0, 0);
5084 scale_rtx = XEXP (op0, 1);
5085 if (code1 == REG || code1 == SUBREG)
5086 base = op1; /* index*scale + base */
5088 disp = op1; /* index*scale + disp */
5090 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5092 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5093 scale_rtx = XEXP (XEXP (op0, 0), 1);
5094 base = XEXP (op0, 1);
5097 else if (code0 == PLUS)
5099 index = XEXP (op0, 0); /* index + base + disp */
5100 base = XEXP (op0, 1);
5106 else if (GET_CODE (addr) == MULT)
5108 index = XEXP (addr, 0); /* index*scale */
5109 scale_rtx = XEXP (addr, 1);
5111 else if (GET_CODE (addr) == ASHIFT)
5115 /* We're called for lea too, which implements ashift on occasion. */
5116 index = XEXP (addr, 0);
5117 tmp = XEXP (addr, 1);
5118 if (GET_CODE (tmp) != CONST_INT)
5120 scale = INTVAL (tmp);
5121 if ((unsigned HOST_WIDE_INT) scale > 3)
5127 disp = addr; /* displacement */
5129 /* Extract the integral value of scale. */
5132 if (GET_CODE (scale_rtx) != CONST_INT)
5134 scale = INTVAL (scale_rtx);
5137 /* Allow arg pointer and stack pointer as index if there is not scaling */
5138 if (base && index && scale == 1
5139 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5140 || index == stack_pointer_rtx))
5147 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5148 if ((base == hard_frame_pointer_rtx
5149 || base == frame_pointer_rtx
5150 || base == arg_pointer_rtx) && !disp)
5153 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5154 Avoid this by transforming to [%esi+0]. */
5155 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5156 && base && !index && !disp
5158 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5161 /* Special case: encode reg+reg instead of reg*2. */
5162 if (!base && index && scale && scale == 2)
5163 base = index, scale = 1;
5165 /* Special case: scaling cannot be encoded without base or displacement. */
5166 if (!base && !disp && index && scale != 1)
5177 /* Return cost of the memory address x.
5178 For i386, it is better to use a complex address than let gcc copy
5179 the address into a reg and make a new pseudo. But not if the address
5180 requires to two regs - that would mean more pseudos with longer
5183 ix86_address_cost (x)
5186 struct ix86_address parts;
5189 if (!ix86_decompose_address (x, &parts))
5192 if (parts.base && GET_CODE (parts.base) == SUBREG)
5193 parts.base = SUBREG_REG (parts.base);
5194 if (parts.index && GET_CODE (parts.index) == SUBREG)
5195 parts.index = SUBREG_REG (parts.index);
5197 /* More complex memory references are better. */
5198 if (parts.disp && parts.disp != const0_rtx)
5201 /* Attempt to minimize number of registers in the address. */
5203 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5205 && (!REG_P (parts.index)
5206 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5210 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5212 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5213 && parts.base != parts.index)
5216 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5217 since it's predecode logic can't detect the length of instructions
5218 and it degenerates to vector decoded. Increase cost of such
5219 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5220 to split such addresses or even refuse such addresses at all.
5222 Following addressing modes are affected:
5227 The first and last case may be avoidable by explicitly coding the zero in
5228 memory address, but I don't have AMD-K6 machine handy to check this
5232 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5233 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5234 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5240 /* If X is a machine specific address (i.e. a symbol or label being
5241 referenced as a displacement from the GOT implemented using an
5242 UNSPEC), then return the base term. Otherwise return X. */
5245 ix86_find_base_term (x)
5252 if (GET_CODE (x) != CONST)
5255 if (GET_CODE (term) == PLUS
5256 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5257 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5258 term = XEXP (term, 0);
5259 if (GET_CODE (term) != UNSPEC
5260 || XINT (term, 1) != UNSPEC_GOTPCREL)
5263 term = XVECEXP (term, 0, 0);
5265 if (GET_CODE (term) != SYMBOL_REF
5266 && GET_CODE (term) != LABEL_REF)
5272 if (GET_CODE (x) != PLUS
5273 || XEXP (x, 0) != pic_offset_table_rtx
5274 || GET_CODE (XEXP (x, 1)) != CONST)
5277 term = XEXP (XEXP (x, 1), 0);
5279 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5280 term = XEXP (term, 0);
5282 if (GET_CODE (term) != UNSPEC
5283 || XINT (term, 1) != UNSPEC_GOTOFF)
5286 term = XVECEXP (term, 0, 0);
5288 if (GET_CODE (term) != SYMBOL_REF
5289 && GET_CODE (term) != LABEL_REF)
5295 /* Determine if a given RTX is a valid constant. We already know this
5296 satisfies CONSTANT_P. */
5299 legitimate_constant_p (x)
5304 switch (GET_CODE (x))
5307 /* TLS symbols are not constant. */
5308 if (tls_symbolic_operand (x, Pmode))
5313 inner = XEXP (x, 0);
5315 /* Offsets of TLS symbols are never valid.
5316 Discourage CSE from creating them. */
5317 if (GET_CODE (inner) == PLUS
5318 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5321 /* Only some unspecs are valid as "constants". */
5322 if (GET_CODE (inner) == UNSPEC)
5323 switch (XINT (inner, 1))
5326 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5336 /* Otherwise we handle everything else in the move patterns. */
5340 /* Determine if it's legal to put X into the constant pool. This
5341 is not possible for the address of thread-local symbols, which
5342 is checked above. */
5345 ix86_cannot_force_const_mem (x)
5348 return !legitimate_constant_p (x);
5351 /* Determine if a given RTX is a valid constant address. */
5354 constant_address_p (x)
5357 switch (GET_CODE (x))
5364 return TARGET_64BIT;
5367 /* For Mach-O, really believe the CONST. */
5370 /* Otherwise fall through. */
5372 return !flag_pic && legitimate_constant_p (x);
5379 /* Nonzero if the constant value X is a legitimate general operand
5380 when generating PIC code. It is given that flag_pic is on and
5381 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5384 legitimate_pic_operand_p (x)
5389 switch (GET_CODE (x))
5392 inner = XEXP (x, 0);
5394 /* Only some unspecs are valid as "constants". */
5395 if (GET_CODE (inner) == UNSPEC)
5396 switch (XINT (inner, 1))
5399 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5407 return legitimate_pic_address_disp_p (x);
5414 /* Determine if a given CONST RTX is a valid memory displacement
5418 legitimate_pic_address_disp_p (disp)
5423 /* In 64bit mode we can allow direct addresses of symbols and labels
5424 when they are not dynamic symbols. */
5427 /* TLS references should always be enclosed in UNSPEC. */
5428 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5430 if (GET_CODE (disp) == SYMBOL_REF
5431 && ix86_cmodel == CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (disp)
5433 || SYMBOL_REF_FLAG (disp)))
5435 if (GET_CODE (disp) == LABEL_REF)
5437 if (GET_CODE (disp) == CONST
5438 && GET_CODE (XEXP (disp, 0)) == PLUS
5439 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5440 && ix86_cmodel == CM_SMALL_PIC
5441 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5442 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5443 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5444 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5445 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5446 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5449 if (GET_CODE (disp) != CONST)
5451 disp = XEXP (disp, 0);
5455 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5456 of GOT tables. We should not need these anyway. */
5457 if (GET_CODE (disp) != UNSPEC
5458 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5461 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5462 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5468 if (GET_CODE (disp) == PLUS)
5470 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5472 disp = XEXP (disp, 0);
5476 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5477 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5479 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5480 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5481 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5483 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5484 if (strstr (sym_name, "$pb") != 0)
5489 if (GET_CODE (disp) != UNSPEC)
5492 switch (XINT (disp, 1))
5497 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5499 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5500 case UNSPEC_GOTTPOFF:
5501 case UNSPEC_GOTNTPOFF:
5502 case UNSPEC_INDNTPOFF:
5505 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5507 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5509 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5515 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5516 memory address for an instruction. The MODE argument is the machine mode
5517 for the MEM expression that wants to use this address.
5519 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5520 convert common non-canonical forms to canonical form so that they will
5524 legitimate_address_p (mode, addr, strict)
5525 enum machine_mode mode;
5529 struct ix86_address parts;
5530 rtx base, index, disp;
5531 HOST_WIDE_INT scale;
5532 const char *reason = NULL;
5533 rtx reason_rtx = NULL_RTX;
5535 if (TARGET_DEBUG_ADDR)
5538 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5539 GET_MODE_NAME (mode), strict);
5543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5545 if (TARGET_DEBUG_ADDR)
5546 fprintf (stderr, "Success.\n");
5550 if (ix86_decompose_address (addr, &parts) <= 0)
5552 reason = "decomposition failed";
5557 index = parts.index;
5559 scale = parts.scale;
5561 /* Validate base register.
5563 Don't allow SUBREG's here, it can lead to spill failures when the base
5564 is one word out of a two word structure, which is represented internally
5572 if (GET_CODE (base) == SUBREG)
5573 reg = SUBREG_REG (base);
5577 if (GET_CODE (reg) != REG)
5579 reason = "base is not a register";
5583 if (GET_MODE (base) != Pmode)
5585 reason = "base is not in Pmode";
5589 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5590 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5592 reason = "base is not valid";
5597 /* Validate index register.
5599 Don't allow SUBREG's here, it can lead to spill failures when the index
5600 is one word out of a two word structure, which is represented internally
5608 if (GET_CODE (index) == SUBREG)
5609 reg = SUBREG_REG (index);
5613 if (GET_CODE (reg) != REG)
5615 reason = "index is not a register";
5619 if (GET_MODE (index) != Pmode)
5621 reason = "index is not in Pmode";
5625 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5626 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5628 reason = "index is not valid";
5633 /* Validate scale factor. */
5636 reason_rtx = GEN_INT (scale);
5639 reason = "scale without index";
5643 if (scale != 2 && scale != 4 && scale != 8)
5645 reason = "scale is not a valid multiplier";
5650 /* Validate displacement. */
5655 if (GET_CODE (disp) == CONST
5656 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5657 switch (XINT (XEXP (disp, 0), 1))
5661 case UNSPEC_GOTPCREL:
5664 goto is_legitimate_pic;
5666 case UNSPEC_GOTTPOFF:
5667 case UNSPEC_GOTNTPOFF:
5668 case UNSPEC_INDNTPOFF:
5674 reason = "invalid address unspec";
5678 else if (flag_pic && (SYMBOLIC_CONST (disp)
5680 && !machopic_operand_p (disp)
5685 if (TARGET_64BIT && (index || base))
5687 /* foo@dtpoff(%rX) is ok. */
5688 if (GET_CODE (disp) != CONST
5689 || GET_CODE (XEXP (disp, 0)) != PLUS
5690 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5691 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5692 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5693 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5695 reason = "non-constant pic memory reference";
5699 else if (! legitimate_pic_address_disp_p (disp))
5701 reason = "displacement is an invalid pic construct";
5705 /* This code used to verify that a symbolic pic displacement
5706 includes the pic_offset_table_rtx register.
5708 While this is good idea, unfortunately these constructs may
5709 be created by "adds using lea" optimization for incorrect
5718 This code is nonsensical, but results in addressing
5719 GOT table with pic_offset_table_rtx base. We can't
5720 just refuse it easily, since it gets matched by
5721 "addsi3" pattern, that later gets split to lea in the
5722 case output register differs from input. While this
5723 can be handled by separate addsi pattern for this case
5724 that never results in lea, this seems to be easier and
5725 correct fix for crash to disable this test. */
5727 else if (!CONSTANT_ADDRESS_P (disp))
5729 reason = "displacement is not constant";
5732 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5734 reason = "displacement is out of range";
5737 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5739 reason = "displacement is a const_double";
5744 /* Everything looks valid. */
5745 if (TARGET_DEBUG_ADDR)
5746 fprintf (stderr, "Success.\n");
5750 if (TARGET_DEBUG_ADDR)
5752 fprintf (stderr, "Error: %s\n", reason);
5753 debug_rtx (reason_rtx);
5758 /* Return an unique alias set for the GOT. */
5760 static HOST_WIDE_INT
5761 ix86_GOT_alias_set ()
5763 static HOST_WIDE_INT set = -1;
5765 set = new_alias_set ();
5769 /* Return a legitimate reference for ORIG (an address) using the
5770 register REG. If REG is 0, a new pseudo is generated.
5772 There are two types of references that must be handled:
5774 1. Global data references must load the address from the GOT, via
5775 the PIC reg. An insn is emitted to do this load, and the reg is
5778 2. Static data references, constant pool addresses, and code labels
5779 compute the address as an offset from the GOT, whose base is in
5780 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5781 differentiate them from global data objects. The returned
5782 address is the PIC reg + an unspec constant.
5784 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5785 reg also appears in the address. */
5788 legitimize_pic_address (orig, reg)
5798 reg = gen_reg_rtx (Pmode);
5799 /* Use the generic Mach-O PIC machinery. */
5800 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5803 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5805 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5807 /* This symbol may be referenced via a displacement from the PIC
5808 base address (@GOTOFF). */
5810 if (reload_in_progress)
5811 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5812 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5813 new = gen_rtx_CONST (Pmode, new);
5814 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5818 emit_move_insn (reg, new);
5822 else if (GET_CODE (addr) == SYMBOL_REF)
5826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5827 new = gen_rtx_CONST (Pmode, new);
5828 new = gen_rtx_MEM (Pmode, new);
5829 RTX_UNCHANGING_P (new) = 1;
5830 set_mem_alias_set (new, ix86_GOT_alias_set ());
5833 reg = gen_reg_rtx (Pmode);
5834 /* Use directly gen_movsi, otherwise the address is loaded
5835 into register for CSE. We don't want to CSE this addresses,
5836 instead we CSE addresses from the GOT table, so skip this. */
5837 emit_insn (gen_movsi (reg, new));
5842 /* This symbol must be referenced via a load from the
5843 Global Offset Table (@GOT). */
5845 if (reload_in_progress)
5846 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5847 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5848 new = gen_rtx_CONST (Pmode, new);
5849 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5850 new = gen_rtx_MEM (Pmode, new);
5851 RTX_UNCHANGING_P (new) = 1;
5852 set_mem_alias_set (new, ix86_GOT_alias_set ());
5855 reg = gen_reg_rtx (Pmode);
5856 emit_move_insn (reg, new);
5862 if (GET_CODE (addr) == CONST)
5864 addr = XEXP (addr, 0);
5866 /* We must match stuff we generate before. Assume the only
5867 unspecs that can get here are ours. Not that we could do
5868 anything with them anyway... */
5869 if (GET_CODE (addr) == UNSPEC
5870 || (GET_CODE (addr) == PLUS
5871 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5873 if (GET_CODE (addr) != PLUS)
5876 if (GET_CODE (addr) == PLUS)
5878 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5880 /* Check first to see if this is a constant offset from a @GOTOFF
5881 symbol reference. */
5882 if (local_symbolic_operand (op0, Pmode)
5883 && GET_CODE (op1) == CONST_INT)
5887 if (reload_in_progress)
5888 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5889 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5891 new = gen_rtx_PLUS (Pmode, new, op1);
5892 new = gen_rtx_CONST (Pmode, new);
5893 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5897 emit_move_insn (reg, new);
5903 if (INTVAL (op1) < -16*1024*1024
5904 || INTVAL (op1) >= 16*1024*1024)
5905 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5910 base = legitimize_pic_address (XEXP (addr, 0), reg);
5911 new = legitimize_pic_address (XEXP (addr, 1),
5912 base == reg ? NULL_RTX : reg);
5914 if (GET_CODE (new) == CONST_INT)
5915 new = plus_constant (base, INTVAL (new));
5918 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5920 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5921 new = XEXP (new, 1);
5923 new = gen_rtx_PLUS (Pmode, base, new);
5932 ix86_encode_section_info (decl, first)
5934 int first ATTRIBUTE_UNUSED;
5936 bool local_p = (*targetm.binds_local_p) (decl);
5939 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5940 if (GET_CODE (rtl) != MEM)
5942 symbol = XEXP (rtl, 0);
5943 if (GET_CODE (symbol) != SYMBOL_REF)
5946 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5947 symbol so that we may access it directly in the GOT. */
5950 SYMBOL_REF_FLAG (symbol) = local_p;
5952 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5953 "local dynamic", "initial exec" or "local exec" TLS models
5956 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5958 const char *symbol_str;
5961 enum tls_model kind = decl_tls_model (decl);
5963 if (TARGET_64BIT && ! flag_pic)
5965 /* x86-64 doesn't allow non-pic code for shared libraries,
5966 so don't generate GD/LD TLS models for non-pic code. */
5969 case TLS_MODEL_GLOBAL_DYNAMIC:
5970 kind = TLS_MODEL_INITIAL_EXEC; break;
5971 case TLS_MODEL_LOCAL_DYNAMIC:
5972 kind = TLS_MODEL_LOCAL_EXEC; break;
5978 symbol_str = XSTR (symbol, 0);
5980 if (symbol_str[0] == '%')
5982 if (symbol_str[1] == tls_model_chars[kind])
5986 len = strlen (symbol_str) + 1;
5987 newstr = alloca (len + 2);
5990 newstr[1] = tls_model_chars[kind];
5991 memcpy (newstr + 2, symbol_str, len);
5993 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5997 /* Undo the above when printing symbol names. */
6000 ix86_strip_name_encoding (str)
6010 /* Load the thread pointer into a register. */
6013 get_thread_pointer ()
6017 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6018 tp = gen_rtx_MEM (Pmode, tp);
6019 RTX_UNCHANGING_P (tp) = 1;
6020 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6021 tp = force_reg (Pmode, tp);
6026 /* Try machine-dependent ways of modifying an illegitimate address
6027 to be legitimate. If we find one, return the new, valid address.
6028 This macro is used in only one place: `memory_address' in explow.c.
6030 OLDX is the address as it was before break_out_memory_refs was called.
6031 In some cases it is useful to look at this to decide what needs to be done.
6033 MODE and WIN are passed so that this macro can use
6034 GO_IF_LEGITIMATE_ADDRESS.
6036 It is always safe for this macro to do nothing. It exists to recognize
6037 opportunities to optimize the output.
6039 For the 80386, we handle X+REG by loading X into a register R and
6040 using R+REG. R will go in a general reg and indexing will be used.
6041 However, if REG is a broken-out memory address or multiplication,
6042 nothing needs to be done because REG can certainly go in a general reg.
6044 When -fpic is used, special handling is needed for symbolic references.
6045 See comments by legitimize_pic_address in i386.c for details. */
6048 legitimize_address (x, oldx, mode)
6050 register rtx oldx ATTRIBUTE_UNUSED;
6051 enum machine_mode mode;
6056 if (TARGET_DEBUG_ADDR)
6058 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6059 GET_MODE_NAME (mode));
6063 log = tls_symbolic_operand (x, mode);
6066 rtx dest, base, off, pic;
6071 case TLS_MODEL_GLOBAL_DYNAMIC:
6072 dest = gen_reg_rtx (Pmode);
6075 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6078 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6079 insns = get_insns ();
6082 emit_libcall_block (insns, dest, rax, x);
6085 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6088 case TLS_MODEL_LOCAL_DYNAMIC:
6089 base = gen_reg_rtx (Pmode);
6092 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6095 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6096 insns = get_insns ();
6099 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6100 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6101 emit_libcall_block (insns, base, rax, note);
6104 emit_insn (gen_tls_local_dynamic_base_32 (base));
6106 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6107 off = gen_rtx_CONST (Pmode, off);
6109 return gen_rtx_PLUS (Pmode, base, off);
6111 case TLS_MODEL_INITIAL_EXEC:
6115 type = UNSPEC_GOTNTPOFF;
6119 if (reload_in_progress)
6120 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6121 pic = pic_offset_table_rtx;
6122 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6124 else if (!TARGET_GNU_TLS)
6126 pic = gen_reg_rtx (Pmode);
6127 emit_insn (gen_set_got (pic));
6128 type = UNSPEC_GOTTPOFF;
6133 type = UNSPEC_INDNTPOFF;
6136 base = get_thread_pointer ();
6138 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6139 off = gen_rtx_CONST (Pmode, off);
6141 off = gen_rtx_PLUS (Pmode, pic, off);
6142 off = gen_rtx_MEM (Pmode, off);
6143 RTX_UNCHANGING_P (off) = 1;
6144 set_mem_alias_set (off, ix86_GOT_alias_set ());
6145 dest = gen_reg_rtx (Pmode);
6147 if (TARGET_64BIT || TARGET_GNU_TLS)
6149 emit_move_insn (dest, off);
6150 return gen_rtx_PLUS (Pmode, base, dest);
6153 emit_insn (gen_subsi3 (dest, base, off));
6156 case TLS_MODEL_LOCAL_EXEC:
6157 base = get_thread_pointer ();
6159 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6160 (TARGET_64BIT || TARGET_GNU_TLS)
6161 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6162 off = gen_rtx_CONST (Pmode, off);
6164 if (TARGET_64BIT || TARGET_GNU_TLS)
6165 return gen_rtx_PLUS (Pmode, base, off);
6168 dest = gen_reg_rtx (Pmode);
6169 emit_insn (gen_subsi3 (dest, base, off));
6180 if (flag_pic && SYMBOLIC_CONST (x))
6181 return legitimize_pic_address (x, 0);
6183 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6184 if (GET_CODE (x) == ASHIFT
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
6186 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6189 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6190 GEN_INT (1 << log));
6193 if (GET_CODE (x) == PLUS)
6195 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6197 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6198 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6199 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6202 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6203 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6204 GEN_INT (1 << log));
6207 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6208 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6209 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6212 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6213 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6214 GEN_INT (1 << log));
6217 /* Put multiply first if it isn't already. */
6218 if (GET_CODE (XEXP (x, 1)) == MULT)
6220 rtx tmp = XEXP (x, 0);
6221 XEXP (x, 0) = XEXP (x, 1);
6226 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6227 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6228 created by virtual register instantiation, register elimination, and
6229 similar optimizations. */
6230 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6233 x = gen_rtx_PLUS (Pmode,
6234 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6235 XEXP (XEXP (x, 1), 0)),
6236 XEXP (XEXP (x, 1), 1));
6240 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6241 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6242 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6245 && CONSTANT_P (XEXP (x, 1)))
6248 rtx other = NULL_RTX;
6250 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6252 constant = XEXP (x, 1);
6253 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6255 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6257 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6258 other = XEXP (x, 1);
6266 x = gen_rtx_PLUS (Pmode,
6267 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6268 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6269 plus_constant (other, INTVAL (constant)));
6273 if (changed && legitimate_address_p (mode, x, FALSE))
6276 if (GET_CODE (XEXP (x, 0)) == MULT)
6279 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6282 if (GET_CODE (XEXP (x, 1)) == MULT)
6285 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6289 && GET_CODE (XEXP (x, 1)) == REG
6290 && GET_CODE (XEXP (x, 0)) == REG)
6293 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6296 x = legitimize_pic_address (x, 0);
6299 if (changed && legitimate_address_p (mode, x, FALSE))
6302 if (GET_CODE (XEXP (x, 0)) == REG)
6304 register rtx temp = gen_reg_rtx (Pmode);
6305 register rtx val = force_operand (XEXP (x, 1), temp);
6307 emit_move_insn (temp, val);
6313 else if (GET_CODE (XEXP (x, 1)) == REG)
6315 register rtx temp = gen_reg_rtx (Pmode);
6316 register rtx val = force_operand (XEXP (x, 0), temp);
6318 emit_move_insn (temp, val);
6328 /* Print an integer constant expression in assembler syntax. Addition
6329 and subtraction are the only arithmetic that may appear in these
6330 expressions. FILE is the stdio stream to write to, X is the rtx, and
6331 CODE is the operand print code from the output string. */
6334 output_pic_addr_const (file, x, code)
6341 switch (GET_CODE (x))
6351 assemble_name (file, XSTR (x, 0));
6352 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6353 fputs ("@PLT", file);
6360 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6361 assemble_name (asm_out_file, buf);
6365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6369 /* This used to output parentheses around the expression,
6370 but that does not work on the 386 (either ATT or BSD assembler). */
6371 output_pic_addr_const (file, XEXP (x, 0), code);
6375 if (GET_MODE (x) == VOIDmode)
6377 /* We can use %d if the number is <32 bits and positive. */
6378 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6379 fprintf (file, "0x%lx%08lx",
6380 (unsigned long) CONST_DOUBLE_HIGH (x),
6381 (unsigned long) CONST_DOUBLE_LOW (x));
6383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6386 /* We can't handle floating point constants;
6387 PRINT_OPERAND must handle them. */
6388 output_operand_lossage ("floating constant misused");
6392 /* Some assemblers need integer constants to appear first. */
6393 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6395 output_pic_addr_const (file, XEXP (x, 0), code);
6397 output_pic_addr_const (file, XEXP (x, 1), code);
6399 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6401 output_pic_addr_const (file, XEXP (x, 1), code);
6403 output_pic_addr_const (file, XEXP (x, 0), code);
6411 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6412 output_pic_addr_const (file, XEXP (x, 0), code);
6414 output_pic_addr_const (file, XEXP (x, 1), code);
6416 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6420 if (XVECLEN (x, 0) != 1)
6422 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6423 switch (XINT (x, 1))
6426 fputs ("@GOT", file);
6429 fputs ("@GOTOFF", file);
6431 case UNSPEC_GOTPCREL:
6432 fputs ("@GOTPCREL(%rip)", file);
6434 case UNSPEC_GOTTPOFF:
6435 /* FIXME: This might be @TPOFF in Sun ld too. */
6436 fputs ("@GOTTPOFF", file);
6439 fputs ("@TPOFF", file);
6443 fputs ("@TPOFF", file);
6445 fputs ("@NTPOFF", file);
6448 fputs ("@DTPOFF", file);
6450 case UNSPEC_GOTNTPOFF:
6452 fputs ("@GOTTPOFF(%rip)", file);
6454 fputs ("@GOTNTPOFF", file);
6456 case UNSPEC_INDNTPOFF:
6457 fputs ("@INDNTPOFF", file);
6460 output_operand_lossage ("invalid UNSPEC as operand");
6466 output_operand_lossage ("invalid expression as operand");
6470 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6471 We need to handle our special PIC relocations. */
6474 i386_dwarf_output_addr_const (file, x)
6479 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6483 fprintf (file, "%s", ASM_LONG);
6486 output_pic_addr_const (file, x, '\0');
6488 output_addr_const (file, x);
6492 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6493 We need to emit DTP-relative relocations. */
6496 i386_output_dwarf_dtprel (file, size, x)
6501 fputs (ASM_LONG, file);
6502 output_addr_const (file, x);
6503 fputs ("@DTPOFF", file);
6509 fputs (", 0", file);
6516 /* In the name of slightly smaller debug output, and to cater to
6517 general assembler losage, recognize PIC+GOTOFF and turn it back
6518 into a direct symbol reference. */
6521 i386_simplify_dwarf_addr (orig_x)
6526 if (GET_CODE (x) == MEM)
6531 if (GET_CODE (x) != CONST
6532 || GET_CODE (XEXP (x, 0)) != UNSPEC
6533 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6534 || GET_CODE (orig_x) != MEM)
6536 return XVECEXP (XEXP (x, 0), 0, 0);
6539 if (GET_CODE (x) != PLUS
6540 || GET_CODE (XEXP (x, 1)) != CONST)
6543 if (GET_CODE (XEXP (x, 0)) == REG
6544 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6545 /* %ebx + GOT/GOTOFF */
6547 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6549 /* %ebx + %reg * scale + GOT/GOTOFF */
6551 if (GET_CODE (XEXP (y, 0)) == REG
6552 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6554 else if (GET_CODE (XEXP (y, 1)) == REG
6555 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6559 if (GET_CODE (y) != REG
6560 && GET_CODE (y) != MULT
6561 && GET_CODE (y) != ASHIFT)
6567 x = XEXP (XEXP (x, 1), 0);
6568 if (GET_CODE (x) == UNSPEC
6569 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6570 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6573 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6574 return XVECEXP (x, 0, 0);
6577 if (GET_CODE (x) == PLUS
6578 && GET_CODE (XEXP (x, 0)) == UNSPEC
6579 && GET_CODE (XEXP (x, 1)) == CONST_INT
6580 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6581 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6582 && GET_CODE (orig_x) != MEM)))
6584 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6586 return gen_rtx_PLUS (Pmode, y, x);
6594 put_condition_code (code, mode, reverse, fp, file)
6596 enum machine_mode mode;
6602 if (mode == CCFPmode || mode == CCFPUmode)
6604 enum rtx_code second_code, bypass_code;
6605 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6606 if (bypass_code != NIL || second_code != NIL)
6608 code = ix86_fp_compare_code_to_integer (code);
6612 code = reverse_condition (code);
6623 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6628 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6629 Those same assemblers have the same but opposite losage on cmov. */
6632 suffix = fp ? "nbe" : "a";
6635 if (mode == CCNOmode || mode == CCGOCmode)
6637 else if (mode == CCmode || mode == CCGCmode)
6648 if (mode == CCNOmode || mode == CCGOCmode)
6650 else if (mode == CCmode || mode == CCGCmode)
6659 suffix = fp ? "nb" : "ae";
6662 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6672 suffix = fp ? "u" : "p";
6675 suffix = fp ? "nu" : "np";
6680 fputs (suffix, file);
6684 print_reg (x, code, file)
6689 if (REGNO (x) == ARG_POINTER_REGNUM
6690 || REGNO (x) == FRAME_POINTER_REGNUM
6691 || REGNO (x) == FLAGS_REG
6692 || REGNO (x) == FPSR_REG)
6695 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6698 if (code == 'w' || MMX_REG_P (x))
6700 else if (code == 'b')
6702 else if (code == 'k')
6704 else if (code == 'q')
6706 else if (code == 'y')
6708 else if (code == 'h')
6711 code = GET_MODE_SIZE (GET_MODE (x));
6713 /* Irritatingly, AMD extended registers use different naming convention
6714 from the normal registers. */
6715 if (REX_INT_REG_P (x))
6722 error ("extended registers have no high halves");
6725 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6728 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6731 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6734 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6737 error ("unsupported operand size for extended register");
6745 if (STACK_TOP_P (x))
6747 fputs ("st(0)", file);
6754 if (! ANY_FP_REG_P (x))
6755 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6759 fputs (hi_reg_name[REGNO (x)], file);
6762 fputs (qi_reg_name[REGNO (x)], file);
6765 fputs (qi_high_reg_name[REGNO (x)], file);
6772 /* Locate some local-dynamic symbol still in use by this function
6773 so that we can print its name in some tls_local_dynamic_base
6777 get_some_local_dynamic_name ()
6781 if (cfun->machine->some_ld_name)
6782 return cfun->machine->some_ld_name;
6784 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6786 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6787 return cfun->machine->some_ld_name;
6793 get_some_local_dynamic_name_1 (px, data)
6795 void *data ATTRIBUTE_UNUSED;
6799 if (GET_CODE (x) == SYMBOL_REF
6800 && local_dynamic_symbolic_operand (x, Pmode))
6802 cfun->machine->some_ld_name = XSTR (x, 0);
6810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6811 C -- print opcode suffix for set/cmov insn.
6812 c -- like C, but print reversed condition
6813 F,f -- likewise, but for floating-point.
6814 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6816 R -- print the prefix for register names.
6817 z -- print the opcode suffix for the size of the current operand.
6818 * -- print a star (in certain assembler syntax)
6819 A -- print an absolute memory reference.
6820 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6821 s -- print a shift double count, followed by the assemblers argument
6823 b -- print the QImode name of the register for the indicated operand.
6824 %b0 would print %al if operands[0] is reg 0.
6825 w -- likewise, print the HImode name of the register.
6826 k -- likewise, print the SImode name of the register.
6827 q -- likewise, print the DImode name of the register.
6828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6829 y -- print "st(0)" instead of "st" as a register.
6830 D -- print condition for SSE cmp instruction.
6831 P -- if PIC, print an @PLT suffix.
6832 X -- don't print any sort of PIC '@' suffix for a symbol.
6833 & -- print some in-use local-dynamic symbol name.
6837 print_operand (file, x, code)
6847 if (ASSEMBLER_DIALECT == ASM_ATT)
6852 assemble_name (file, get_some_local_dynamic_name ());
6856 if (ASSEMBLER_DIALECT == ASM_ATT)
6858 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6860 /* Intel syntax. For absolute addresses, registers should not
6861 be surrounded by braces. */
6862 if (GET_CODE (x) != REG)
6865 PRINT_OPERAND (file, x, 0);
6873 PRINT_OPERAND (file, x, 0);
6878 if (ASSEMBLER_DIALECT == ASM_ATT)
6883 if (ASSEMBLER_DIALECT == ASM_ATT)
6888 if (ASSEMBLER_DIALECT == ASM_ATT)
6893 if (ASSEMBLER_DIALECT == ASM_ATT)
6898 if (ASSEMBLER_DIALECT == ASM_ATT)
6903 if (ASSEMBLER_DIALECT == ASM_ATT)
6908 /* 387 opcodes don't get size suffixes if the operands are
6910 if (STACK_REG_P (x))
6913 /* Likewise if using Intel opcodes. */
6914 if (ASSEMBLER_DIALECT == ASM_INTEL)
6917 /* This is the size of op from size of operand. */
6918 switch (GET_MODE_SIZE (GET_MODE (x)))
6921 #ifdef HAVE_GAS_FILDS_FISTS
6927 if (GET_MODE (x) == SFmode)
6942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6944 #ifdef GAS_MNEMONICS
6970 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6972 PRINT_OPERAND (file, x, 0);
6978 /* Little bit of braindamage here. The SSE compare instructions
6979 does use completely different names for the comparisons that the
6980 fp conditional moves. */
6981 switch (GET_CODE (x))
6996 fputs ("unord", file);
7000 fputs ("neq", file);
7004 fputs ("nlt", file);
7008 fputs ("nle", file);
7011 fputs ("ord", file);
7019 #ifdef CMOV_SUN_AS_SYNTAX
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7022 switch (GET_MODE (x))
7024 case HImode: putc ('w', file); break;
7026 case SFmode: putc ('l', file); break;
7028 case DFmode: putc ('q', file); break;
7036 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7039 #ifdef CMOV_SUN_AS_SYNTAX
7040 if (ASSEMBLER_DIALECT == ASM_ATT)
7043 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7046 /* Like above, but reverse condition */
7048 /* Check to see if argument to %c is really a constant
7049 and not a condition code which needs to be reversed. */
7050 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7052 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7055 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7058 #ifdef CMOV_SUN_AS_SYNTAX
7059 if (ASSEMBLER_DIALECT == ASM_ATT)
7062 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7068 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7071 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7074 int pred_val = INTVAL (XEXP (x, 0));
7076 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7077 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7079 int taken = pred_val > REG_BR_PROB_BASE / 2;
7080 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7082 /* Emit hints only in the case default branch prediction
7083 heruistics would fail. */
7084 if (taken != cputaken)
7086 /* We use 3e (DS) prefix for taken branches and
7087 2e (CS) prefix for not taken branches. */
7089 fputs ("ds ; ", file);
7091 fputs ("cs ; ", file);
7098 output_operand_lossage ("invalid operand code `%c'", code);
7102 if (GET_CODE (x) == REG)
7104 PRINT_REG (x, code, file);
7107 else if (GET_CODE (x) == MEM)
7109 /* No `byte ptr' prefix for call instructions. */
7110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7113 switch (GET_MODE_SIZE (GET_MODE (x)))
7115 case 1: size = "BYTE"; break;
7116 case 2: size = "WORD"; break;
7117 case 4: size = "DWORD"; break;
7118 case 8: size = "QWORD"; break;
7119 case 12: size = "XWORD"; break;
7120 case 16: size = "XMMWORD"; break;
7125 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7128 else if (code == 'w')
7130 else if (code == 'k')
7134 fputs (" PTR ", file);
7138 if (flag_pic && CONSTANT_ADDRESS_P (x))
7139 output_pic_addr_const (file, x, code);
7140 /* Avoid (%rip) for call operands. */
7141 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7142 && GET_CODE (x) != CONST_INT)
7143 output_addr_const (file, x);
7144 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7145 output_operand_lossage ("invalid constraints for operand");
7150 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7156 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7160 fprintf (file, "0x%lx", l);
7163 /* These float cases don't actually occur as immediate operands. */
7164 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7168 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7169 fprintf (file, "%s", dstr);
7172 else if (GET_CODE (x) == CONST_DOUBLE
7173 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7177 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7178 fprintf (file, "%s", dstr);
7185 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7187 if (ASSEMBLER_DIALECT == ASM_ATT)
7190 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7191 || GET_CODE (x) == LABEL_REF)
7193 if (ASSEMBLER_DIALECT == ASM_ATT)
7196 fputs ("OFFSET FLAT:", file);
7199 if (GET_CODE (x) == CONST_INT)
7200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7202 output_pic_addr_const (file, x, code);
7204 output_addr_const (file, x);
7208 /* Print a memory operand whose address is ADDR. */
7211 print_operand_address (file, addr)
7215 struct ix86_address parts;
7216 rtx base, index, disp;
7219 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7221 if (ASSEMBLER_DIALECT == ASM_INTEL)
7222 fputs ("DWORD PTR ", file);
7223 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7226 fputs ("fs:0", file);
7228 fputs ("gs:0", file);
7232 if (! ix86_decompose_address (addr, &parts))
7236 index = parts.index;
7238 scale = parts.scale;
7240 if (!base && !index)
7242 /* Displacement only requires special attention. */
7244 if (GET_CODE (disp) == CONST_INT)
7246 if (ASSEMBLER_DIALECT == ASM_INTEL)
7248 if (USER_LABEL_PREFIX[0] == 0)
7250 fputs ("ds:", file);
7252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7255 output_pic_addr_const (file, addr, 0);
7257 output_addr_const (file, addr);
7259 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7261 && ((GET_CODE (addr) == SYMBOL_REF
7262 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7263 || GET_CODE (addr) == LABEL_REF
7264 || (GET_CODE (addr) == CONST
7265 && GET_CODE (XEXP (addr, 0)) == PLUS
7266 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7267 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7268 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7269 fputs ("(%rip)", file);
7273 if (ASSEMBLER_DIALECT == ASM_ATT)
7278 output_pic_addr_const (file, disp, 0);
7279 else if (GET_CODE (disp) == LABEL_REF)
7280 output_asm_label (disp);
7282 output_addr_const (file, disp);
7287 PRINT_REG (base, 0, file);
7291 PRINT_REG (index, 0, file);
7293 fprintf (file, ",%d", scale);
7299 rtx offset = NULL_RTX;
7303 /* Pull out the offset of a symbol; print any symbol itself. */
7304 if (GET_CODE (disp) == CONST
7305 && GET_CODE (XEXP (disp, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7308 offset = XEXP (XEXP (disp, 0), 1);
7309 disp = gen_rtx_CONST (VOIDmode,
7310 XEXP (XEXP (disp, 0), 0));
7314 output_pic_addr_const (file, disp, 0);
7315 else if (GET_CODE (disp) == LABEL_REF)
7316 output_asm_label (disp);
7317 else if (GET_CODE (disp) == CONST_INT)
7320 output_addr_const (file, disp);
7326 PRINT_REG (base, 0, file);
7329 if (INTVAL (offset) >= 0)
7331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7342 PRINT_REG (index, 0, file);
7344 fprintf (file, "*%d", scale);
7352 output_addr_const_extra (file, x)
7358 if (GET_CODE (x) != UNSPEC)
7361 op = XVECEXP (x, 0, 0);
7362 switch (XINT (x, 1))
7364 case UNSPEC_GOTTPOFF:
7365 output_addr_const (file, op);
7366 /* FIXME: This might be @TPOFF in Sun ld. */
7367 fputs ("@GOTTPOFF", file);
7370 output_addr_const (file, op);
7371 fputs ("@TPOFF", file);
7374 output_addr_const (file, op);
7376 fputs ("@TPOFF", file);
7378 fputs ("@NTPOFF", file);
7381 output_addr_const (file, op);
7382 fputs ("@DTPOFF", file);
7384 case UNSPEC_GOTNTPOFF:
7385 output_addr_const (file, op);
7387 fputs ("@GOTTPOFF(%rip)", file);
7389 fputs ("@GOTNTPOFF", file);
7391 case UNSPEC_INDNTPOFF:
7392 output_addr_const (file, op);
7393 fputs ("@INDNTPOFF", file);
7403 /* Split one or more DImode RTL references into pairs of SImode
7404 references. The RTL can be REG, offsettable MEM, integer constant, or
7405 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7406 split and "num" is its length. lo_half and hi_half are output arrays
7407 that parallel "operands". */
7410 split_di (operands, num, lo_half, hi_half)
7413 rtx lo_half[], hi_half[];
7417 rtx op = operands[num];
7419 /* simplify_subreg refuse to split volatile memory addresses,
7420 but we still have to handle it. */
7421 if (GET_CODE (op) == MEM)
7423 lo_half[num] = adjust_address (op, SImode, 0);
7424 hi_half[num] = adjust_address (op, SImode, 4);
7428 lo_half[num] = simplify_gen_subreg (SImode, op,
7429 GET_MODE (op) == VOIDmode
7430 ? DImode : GET_MODE (op), 0);
7431 hi_half[num] = simplify_gen_subreg (SImode, op,
7432 GET_MODE (op) == VOIDmode
7433 ? DImode : GET_MODE (op), 4);
7437 /* Split one or more TImode RTL references into pairs of SImode
7438 references. The RTL can be REG, offsettable MEM, integer constant, or
7439 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7440 split and "num" is its length. lo_half and hi_half are output arrays
7441 that parallel "operands". */
7444 split_ti (operands, num, lo_half, hi_half)
7447 rtx lo_half[], hi_half[];
7451 rtx op = operands[num];
7453 /* simplify_subreg refuse to split volatile memory addresses, but we
7454 still have to handle it. */
7455 if (GET_CODE (op) == MEM)
7457 lo_half[num] = adjust_address (op, DImode, 0);
7458 hi_half[num] = adjust_address (op, DImode, 8);
7462 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7463 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7468 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7469 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7470 is the expression of the binary operation. The output may either be
7471 emitted here, or returned to the caller, like all output_* functions.
7473 There is no guarantee that the operands are the same mode, as they
7474 might be within FLOAT or FLOAT_EXTEND expressions. */
7476 #ifndef SYSV386_COMPAT
7477 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7478 wants to fix the assemblers because that causes incompatibility
7479 with gcc. No-one wants to fix gcc because that causes
7480 incompatibility with assemblers... You can use the option of
7481 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7482 #define SYSV386_COMPAT 1
7486 output_387_binary_op (insn, operands)
7490 static char buf[30];
7493 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7495 #ifdef ENABLE_CHECKING
7496 /* Even if we do not want to check the inputs, this documents input
7497 constraints. Which helps in understanding the following code. */
7498 if (STACK_REG_P (operands[0])
7499 && ((REG_P (operands[1])
7500 && REGNO (operands[0]) == REGNO (operands[1])
7501 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7502 || (REG_P (operands[2])
7503 && REGNO (operands[0]) == REGNO (operands[2])
7504 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7505 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7511 switch (GET_CODE (operands[3]))
7514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7523 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7532 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7541 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7542 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7556 if (GET_MODE (operands[0]) == SFmode)
7557 strcat (buf, "ss\t{%2, %0|%0, %2}");
7559 strcat (buf, "sd\t{%2, %0|%0, %2}");
7564 switch (GET_CODE (operands[3]))
7568 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7570 rtx temp = operands[2];
7571 operands[2] = operands[1];
7575 /* know operands[0] == operands[1]. */
7577 if (GET_CODE (operands[2]) == MEM)
7583 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7585 if (STACK_TOP_P (operands[0]))
7586 /* How is it that we are storing to a dead operand[2]?
7587 Well, presumably operands[1] is dead too. We can't
7588 store the result to st(0) as st(0) gets popped on this
7589 instruction. Instead store to operands[2] (which I
7590 think has to be st(1)). st(1) will be popped later.
7591 gcc <= 2.8.1 didn't have this check and generated
7592 assembly code that the Unixware assembler rejected. */
7593 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7599 if (STACK_TOP_P (operands[0]))
7600 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7602 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7607 if (GET_CODE (operands[1]) == MEM)
7613 if (GET_CODE (operands[2]) == MEM)
7619 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7622 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7623 derived assemblers, confusingly reverse the direction of
7624 the operation for fsub{r} and fdiv{r} when the
7625 destination register is not st(0). The Intel assembler
7626 doesn't have this brain damage. Read !SYSV386_COMPAT to
7627 figure out what the hardware really does. */
7628 if (STACK_TOP_P (operands[0]))
7629 p = "{p\t%0, %2|rp\t%2, %0}";
7631 p = "{rp\t%2, %0|p\t%0, %2}";
7633 if (STACK_TOP_P (operands[0]))
7634 /* As above for fmul/fadd, we can't store to st(0). */
7635 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7637 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7642 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7645 if (STACK_TOP_P (operands[0]))
7646 p = "{rp\t%0, %1|p\t%1, %0}";
7648 p = "{p\t%1, %0|rp\t%0, %1}";
7650 if (STACK_TOP_P (operands[0]))
7651 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7653 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7658 if (STACK_TOP_P (operands[0]))
7660 if (STACK_TOP_P (operands[1]))
7661 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7663 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7666 else if (STACK_TOP_P (operands[1]))
7669 p = "{\t%1, %0|r\t%0, %1}";
7671 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7677 p = "{r\t%2, %0|\t%0, %2}";
7679 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7692 /* Output code to initialize control word copies used by
7693 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7694 is set to control word rounding downwards. */
7696 emit_i387_cw_initialization (normal, round_down)
7697 rtx normal, round_down;
7699 rtx reg = gen_reg_rtx (HImode);
7701 emit_insn (gen_x86_fnstcw_1 (normal));
7702 emit_move_insn (reg, normal);
7703 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7705 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7707 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7708 emit_move_insn (round_down, reg);
7711 /* Output code for INSN to convert a float to a signed int. OPERANDS
7712 are the insn operands. The output may be [HSD]Imode and the input
7713 operand may be [SDX]Fmode. */
7716 output_fix_trunc (insn, operands)
7720 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7721 int dimode_p = GET_MODE (operands[0]) == DImode;
7723 /* Jump through a hoop or two for DImode, since the hardware has no
7724 non-popping instruction. We used to do this a different way, but
7725 that was somewhat fragile and broke with post-reload splitters. */
7726 if (dimode_p && !stack_top_dies)
7727 output_asm_insn ("fld\t%y1", operands);
7729 if (!STACK_TOP_P (operands[1]))
7732 if (GET_CODE (operands[0]) != MEM)
7735 output_asm_insn ("fldcw\t%3", operands);
7736 if (stack_top_dies || dimode_p)
7737 output_asm_insn ("fistp%z0\t%0", operands);
7739 output_asm_insn ("fist%z0\t%0", operands);
7740 output_asm_insn ("fldcw\t%2", operands);
7745 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7746 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7747 when fucom should be used. */
7750 output_fp_compare (insn, operands, eflags_p, unordered_p)
7753 int eflags_p, unordered_p;
7756 rtx cmp_op0 = operands[0];
7757 rtx cmp_op1 = operands[1];
7758 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7763 cmp_op1 = operands[2];
7767 if (GET_MODE (operands[0]) == SFmode)
7769 return "ucomiss\t{%1, %0|%0, %1}";
7771 return "comiss\t{%1, %0|%0, %y}";
7774 return "ucomisd\t{%1, %0|%0, %1}";
7776 return "comisd\t{%1, %0|%0, %y}";
7779 if (! STACK_TOP_P (cmp_op0))
7782 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7784 if (STACK_REG_P (cmp_op1)
7786 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7787 && REGNO (cmp_op1) != FIRST_STACK_REG)
7789 /* If both the top of the 387 stack dies, and the other operand
7790 is also a stack register that dies, then this must be a
7791 `fcompp' float compare */
7795 /* There is no double popping fcomi variant. Fortunately,
7796 eflags is immune from the fstp's cc clobbering. */
7798 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7800 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7808 return "fucompp\n\tfnstsw\t%0";
7810 return "fcompp\n\tfnstsw\t%0";
7823 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7825 static const char * const alt[24] =
7837 "fcomi\t{%y1, %0|%0, %y1}",
7838 "fcomip\t{%y1, %0|%0, %y1}",
7839 "fucomi\t{%y1, %0|%0, %y1}",
7840 "fucomip\t{%y1, %0|%0, %y1}",
7847 "fcom%z2\t%y2\n\tfnstsw\t%0",
7848 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7849 "fucom%z2\t%y2\n\tfnstsw\t%0",
7850 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7852 "ficom%z2\t%y2\n\tfnstsw\t%0",
7853 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7861 mask = eflags_p << 3;
7862 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7863 mask |= unordered_p << 1;
7864 mask |= stack_top_dies;
7877 ix86_output_addr_vec_elt (file, value)
7881 const char *directive = ASM_LONG;
7886 directive = ASM_QUAD;
7892 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7896 ix86_output_addr_diff_elt (file, value, rel)
7901 fprintf (file, "%s%s%d-%s%d\n",
7902 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7903 else if (HAVE_AS_GOTOFF_IN_DATA)
7904 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7906 else if (TARGET_MACHO)
7907 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7908 machopic_function_base_name () + 1);
7911 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7912 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7915 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7919 ix86_expand_clear (dest)
7924 /* We play register width games, which are only valid after reload. */
7925 if (!reload_completed)
7928 /* Avoid HImode and its attendant prefix byte. */
7929 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7930 dest = gen_rtx_REG (SImode, REGNO (dest));
7932 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7934 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7935 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7937 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7938 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7944 /* X is an unchanging MEM. If it is a constant pool reference, return
7945 the constant pool rtx, else NULL. */
7948 maybe_get_pool_constant (x)
7953 if (flag_pic && ! TARGET_64BIT)
7955 if (GET_CODE (x) != PLUS)
7957 if (XEXP (x, 0) != pic_offset_table_rtx)
7960 if (GET_CODE (x) != CONST)
7963 if (GET_CODE (x) != UNSPEC)
7965 if (XINT (x, 1) != UNSPEC_GOTOFF)
7967 x = XVECEXP (x, 0, 0);
7970 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7971 return get_pool_constant (x);
7977 ix86_expand_move (mode, operands)
7978 enum machine_mode mode;
7981 int strict = (reload_in_progress || reload_completed);
7982 rtx insn, op0, op1, tmp;
7987 if (tls_symbolic_operand (op1, Pmode))
7989 op1 = legitimize_address (op1, op1, VOIDmode);
7990 if (GET_CODE (op0) == MEM)
7992 tmp = gen_reg_rtx (mode);
7993 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7997 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8002 rtx temp = ((reload_in_progress
8003 || ((op0 && GET_CODE (op0) == REG)
8005 ? op0 : gen_reg_rtx (Pmode));
8006 op1 = machopic_indirect_data_reference (op1, temp);
8007 op1 = machopic_legitimize_pic_address (op1, mode,
8008 temp == op1 ? 0 : temp);
8012 if (MACHOPIC_INDIRECT)
8013 op1 = machopic_indirect_data_reference (op1, 0);
8017 insn = gen_rtx_SET (VOIDmode, op0, op1);
8021 #endif /* TARGET_MACHO */
8022 if (GET_CODE (op0) == MEM)
8023 op1 = force_reg (Pmode, op1);
8027 if (GET_CODE (temp) != REG)
8028 temp = gen_reg_rtx (Pmode);
8029 temp = legitimize_pic_address (op1, temp);
8037 if (GET_CODE (op0) == MEM
8038 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8039 || !push_operand (op0, mode))
8040 && GET_CODE (op1) == MEM)
8041 op1 = force_reg (mode, op1);
8043 if (push_operand (op0, mode)
8044 && ! general_no_elim_operand (op1, mode))
8045 op1 = copy_to_mode_reg (mode, op1);
8047 /* Force large constants in 64bit compilation into register
8048 to get them CSEed. */
8049 if (TARGET_64BIT && mode == DImode
8050 && immediate_operand (op1, mode)
8051 && !x86_64_zero_extended_value (op1)
8052 && !register_operand (op0, mode)
8053 && optimize && !reload_completed && !reload_in_progress)
8054 op1 = copy_to_mode_reg (mode, op1);
8056 if (FLOAT_MODE_P (mode))
8058 /* If we are loading a floating point constant to a register,
8059 force the value to memory now, since we'll get better code
8060 out the back end. */
8064 else if (GET_CODE (op1) == CONST_DOUBLE
8065 && register_operand (op0, mode))
8066 op1 = validize_mem (force_const_mem (mode, op1));
8070 insn = gen_rtx_SET (VOIDmode, op0, op1);
8076 ix86_expand_vector_move (mode, operands)
8077 enum machine_mode mode;
8080 /* Force constants other than zero into memory. We do not know how
8081 the instructions used to build constants modify the upper 64 bits
8082 of the register, once we have that information we may be able
8083 to handle some of them more efficiently. */
8084 if ((reload_in_progress | reload_completed) == 0
8085 && register_operand (operands[0], mode)
8086 && CONSTANT_P (operands[1]))
8087 operands[1] = force_const_mem (mode, operands[1]);
8089 /* Make operand1 a register if it isn't already. */
8091 && !register_operand (operands[0], mode)
8092 && !register_operand (operands[1], mode))
8094 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8095 emit_move_insn (operands[0], temp);
8099 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8102 /* Attempt to expand a binary operator. Make the expansion closer to the
8103 actual machine, then just general_operand, which will allow 3 separate
8104 memory references (one output, two input) in a single insn. */
8107 ix86_expand_binary_operator (code, mode, operands)
8109 enum machine_mode mode;
8112 int matching_memory;
8113 rtx src1, src2, dst, op, clob;
8119 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8120 if (GET_RTX_CLASS (code) == 'c'
8121 && (rtx_equal_p (dst, src2)
8122 || immediate_operand (src1, mode)))
8129 /* If the destination is memory, and we do not have matching source
8130 operands, do things in registers. */
8131 matching_memory = 0;
8132 if (GET_CODE (dst) == MEM)
8134 if (rtx_equal_p (dst, src1))
8135 matching_memory = 1;
8136 else if (GET_RTX_CLASS (code) == 'c'
8137 && rtx_equal_p (dst, src2))
8138 matching_memory = 2;
8140 dst = gen_reg_rtx (mode);
8143 /* Both source operands cannot be in memory. */
8144 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8146 if (matching_memory != 2)
8147 src2 = force_reg (mode, src2);
8149 src1 = force_reg (mode, src1);
8152 /* If the operation is not commutable, source 1 cannot be a constant
8153 or non-matching memory. */
8154 if ((CONSTANT_P (src1)
8155 || (!matching_memory && GET_CODE (src1) == MEM))
8156 && GET_RTX_CLASS (code) != 'c')
8157 src1 = force_reg (mode, src1);
8159 /* If optimizing, copy to regs to improve CSE */
8160 if (optimize && ! no_new_pseudos)
8162 if (GET_CODE (dst) == MEM)
8163 dst = gen_reg_rtx (mode);
8164 if (GET_CODE (src1) == MEM)
8165 src1 = force_reg (mode, src1);
8166 if (GET_CODE (src2) == MEM)
8167 src2 = force_reg (mode, src2);
8170 /* Emit the instruction. */
8172 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8173 if (reload_in_progress)
8175 /* Reload doesn't know about the flags register, and doesn't know that
8176 it doesn't want to clobber it. We can only do this with PLUS. */
8183 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8187 /* Fix up the destination if needed. */
8188 if (dst != operands[0])
8189 emit_move_insn (operands[0], dst);
8192 /* Return TRUE or FALSE depending on whether the binary operator meets the
8193 appropriate constraints. */
8196 ix86_binary_operator_ok (code, mode, operands)
8198 enum machine_mode mode ATTRIBUTE_UNUSED;
8201 /* Both source operands cannot be in memory. */
8202 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8204 /* If the operation is not commutable, source 1 cannot be a constant. */
8205 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8207 /* If the destination is memory, we must have a matching source operand. */
8208 if (GET_CODE (operands[0]) == MEM
8209 && ! (rtx_equal_p (operands[0], operands[1])
8210 || (GET_RTX_CLASS (code) == 'c'
8211 && rtx_equal_p (operands[0], operands[2]))))
8213 /* If the operation is not commutable and the source 1 is memory, we must
8214 have a matching destination. */
8215 if (GET_CODE (operands[1]) == MEM
8216 && GET_RTX_CLASS (code) != 'c'
8217 && ! rtx_equal_p (operands[0], operands[1]))
8222 /* Attempt to expand a unary operator. Make the expansion closer to the
8223 actual machine, then just general_operand, which will allow 2 separate
8224 memory references (one output, one input) in a single insn. */
8227 ix86_expand_unary_operator (code, mode, operands)
8229 enum machine_mode mode;
8232 int matching_memory;
8233 rtx src, dst, op, clob;
8238 /* If the destination is memory, and we do not have matching source
8239 operands, do things in registers. */
8240 matching_memory = 0;
8241 if (GET_CODE (dst) == MEM)
8243 if (rtx_equal_p (dst, src))
8244 matching_memory = 1;
8246 dst = gen_reg_rtx (mode);
8249 /* When source operand is memory, destination must match. */
8250 if (!matching_memory && GET_CODE (src) == MEM)
8251 src = force_reg (mode, src);
8253 /* If optimizing, copy to regs to improve CSE */
8254 if (optimize && ! no_new_pseudos)
8256 if (GET_CODE (dst) == MEM)
8257 dst = gen_reg_rtx (mode);
8258 if (GET_CODE (src) == MEM)
8259 src = force_reg (mode, src);
8262 /* Emit the instruction. */
8264 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8265 if (reload_in_progress || code == NOT)
8267 /* Reload doesn't know about the flags register, and doesn't know that
8268 it doesn't want to clobber it. */
8275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8279 /* Fix up the destination if needed. */
8280 if (dst != operands[0])
8281 emit_move_insn (operands[0], dst);
8284 /* Return TRUE or FALSE depending on whether the unary operator meets the
8285 appropriate constraints. */
8288 ix86_unary_operator_ok (code, mode, operands)
8289 enum rtx_code code ATTRIBUTE_UNUSED;
8290 enum machine_mode mode ATTRIBUTE_UNUSED;
8291 rtx operands[2] ATTRIBUTE_UNUSED;
8293 /* If one of operands is memory, source and destination must match. */
8294 if ((GET_CODE (operands[0]) == MEM
8295 || GET_CODE (operands[1]) == MEM)
8296 && ! rtx_equal_p (operands[0], operands[1]))
8301 /* Return TRUE or FALSE depending on whether the first SET in INSN
8302 has source and destination with matching CC modes, and that the
8303 CC mode is at least as constrained as REQ_MODE. */
8306 ix86_match_ccmode (insn, req_mode)
8308 enum machine_mode req_mode;
8311 enum machine_mode set_mode;
8313 set = PATTERN (insn);
8314 if (GET_CODE (set) == PARALLEL)
8315 set = XVECEXP (set, 0, 0);
8316 if (GET_CODE (set) != SET)
8318 if (GET_CODE (SET_SRC (set)) != COMPARE)
8321 set_mode = GET_MODE (SET_DEST (set));
8325 if (req_mode != CCNOmode
8326 && (req_mode != CCmode
8327 || XEXP (SET_SRC (set), 1) != const0_rtx))
8331 if (req_mode == CCGCmode)
8335 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8339 if (req_mode == CCZmode)
8349 return (GET_MODE (SET_SRC (set)) == set_mode);
8352 /* Generate insn patterns to do an integer compare of OPERANDS. */
8355 ix86_expand_int_compare (code, op0, op1)
8359 enum machine_mode cmpmode;
8362 cmpmode = SELECT_CC_MODE (code, op0, op1);
8363 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8365 /* This is very simple, but making the interface the same as in the
8366 FP case makes the rest of the code easier. */
8367 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8368 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8370 /* Return the test that should be put into the flags user, i.e.
8371 the bcc, scc, or cmov instruction. */
8372 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8375 /* Figure out whether to use ordered or unordered fp comparisons.
8376 Return the appropriate mode to use. */
8379 ix86_fp_compare_mode (code)
8380 enum rtx_code code ATTRIBUTE_UNUSED;
8382 /* ??? In order to make all comparisons reversible, we do all comparisons
8383 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8384 all forms trapping and nontrapping comparisons, we can make inequality
8385 comparisons trapping again, since it results in better code when using
8386 FCOM based compares. */
8387 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8391 ix86_cc_mode (code, op0, op1)
8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8396 return ix86_fp_compare_mode (code);
8399 /* Only zero flag is needed. */
8401 case NE: /* ZF!=0 */
8403 /* Codes needing carry flag. */
8404 case GEU: /* CF=0 */
8405 case GTU: /* CF=0 & ZF=0 */
8406 case LTU: /* CF=1 */
8407 case LEU: /* CF=1 | ZF=1 */
8409 /* Codes possibly doable only with sign flag when
8410 comparing against zero. */
8411 case GE: /* SF=OF or SF=0 */
8412 case LT: /* SF<>OF or SF=1 */
8413 if (op1 == const0_rtx)
8416 /* For other cases Carry flag is not required. */
8418 /* Codes doable only with sign flag when comparing
8419 against zero, but we miss jump instruction for it
8420 so we need to use relational tests agains overflow
8421 that thus needs to be zero. */
8422 case GT: /* ZF=0 & SF=OF */
8423 case LE: /* ZF=1 | SF<>OF */
8424 if (op1 == const0_rtx)
8428 /* strcmp pattern do (use flags) and combine may ask us for proper
8437 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8440 ix86_use_fcomi_compare (code)
8441 enum rtx_code code ATTRIBUTE_UNUSED;
8443 enum rtx_code swapped_code = swap_condition (code);
8444 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8445 || (ix86_fp_comparison_cost (swapped_code)
8446 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8449 /* Swap, force into registers, or otherwise massage the two operands
8450 to a fp comparison. The operands are updated in place; the new
8451 comparsion code is returned. */
8453 static enum rtx_code
8454 ix86_prepare_fp_compare_args (code, pop0, pop1)
8458 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8459 rtx op0 = *pop0, op1 = *pop1;
8460 enum machine_mode op_mode = GET_MODE (op0);
8461 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8463 /* All of the unordered compare instructions only work on registers.
8464 The same is true of the XFmode compare instructions. The same is
8465 true of the fcomi compare instructions. */
8468 && (fpcmp_mode == CCFPUmode
8469 || op_mode == XFmode
8470 || op_mode == TFmode
8471 || ix86_use_fcomi_compare (code)))
8473 op0 = force_reg (op_mode, op0);
8474 op1 = force_reg (op_mode, op1);
8478 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8479 things around if they appear profitable, otherwise force op0
8482 if (standard_80387_constant_p (op0) == 0
8483 || (GET_CODE (op0) == MEM
8484 && ! (standard_80387_constant_p (op1) == 0
8485 || GET_CODE (op1) == MEM)))
8488 tmp = op0, op0 = op1, op1 = tmp;
8489 code = swap_condition (code);
8492 if (GET_CODE (op0) != REG)
8493 op0 = force_reg (op_mode, op0);
8495 if (CONSTANT_P (op1))
8497 if (standard_80387_constant_p (op1))
8498 op1 = force_reg (op_mode, op1);
8500 op1 = validize_mem (force_const_mem (op_mode, op1));
8504 /* Try to rearrange the comparison to make it cheaper. */
8505 if (ix86_fp_comparison_cost (code)
8506 > ix86_fp_comparison_cost (swap_condition (code))
8507 && (GET_CODE (op1) == REG || !no_new_pseudos))
8510 tmp = op0, op0 = op1, op1 = tmp;
8511 code = swap_condition (code);
8512 if (GET_CODE (op0) != REG)
8513 op0 = force_reg (op_mode, op0);
8521 /* Convert comparison codes we use to represent FP comparison to integer
8522 code that will result in proper branch. Return UNKNOWN if no such code
8524 static enum rtx_code
8525 ix86_fp_compare_code_to_integer (code)
8555 /* Split comparison code CODE into comparisons we can do using branch
8556 instructions. BYPASS_CODE is comparison code for branch that will
8557 branch around FIRST_CODE and SECOND_CODE. If some of branches
8558 is not required, set value to NIL.
8559 We never require more than two branches. */
8561 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8562 enum rtx_code code, *bypass_code, *first_code, *second_code;
8568 /* The fcomi comparison sets flags as follows:
8578 case GT: /* GTU - CF=0 & ZF=0 */
8579 case GE: /* GEU - CF=0 */
8580 case ORDERED: /* PF=0 */
8581 case UNORDERED: /* PF=1 */
8582 case UNEQ: /* EQ - ZF=1 */
8583 case UNLT: /* LTU - CF=1 */
8584 case UNLE: /* LEU - CF=1 | ZF=1 */
8585 case LTGT: /* EQ - ZF=0 */
8587 case LT: /* LTU - CF=1 - fails on unordered */
8589 *bypass_code = UNORDERED;
8591 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8593 *bypass_code = UNORDERED;
8595 case EQ: /* EQ - ZF=1 - fails on unordered */
8597 *bypass_code = UNORDERED;
8599 case NE: /* NE - ZF=0 - fails on unordered */
8601 *second_code = UNORDERED;
8603 case UNGE: /* GEU - CF=0 - fails on unordered */
8605 *second_code = UNORDERED;
8607 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8609 *second_code = UNORDERED;
8614 if (!TARGET_IEEE_FP)
8621 /* Return cost of comparison done fcom + arithmetics operations on AX.
8622 All following functions do use number of instructions as an cost metrics.
8623 In future this should be tweaked to compute bytes for optimize_size and
8624 take into account performance of various instructions on various CPUs. */
8626 ix86_fp_comparison_arithmetics_cost (code)
8629 if (!TARGET_IEEE_FP)
8631 /* The cost of code output by ix86_expand_fp_compare. */
8659 /* Return cost of comparison done using fcomi operation.
8660 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8662 ix86_fp_comparison_fcomi_cost (code)
8665 enum rtx_code bypass_code, first_code, second_code;
8666 /* Return arbitarily high cost when instruction is not supported - this
8667 prevents gcc from using it. */
8670 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8671 return (bypass_code != NIL || second_code != NIL) + 2;
8674 /* Return cost of comparison done using sahf operation.
8675 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8677 ix86_fp_comparison_sahf_cost (code)
8680 enum rtx_code bypass_code, first_code, second_code;
8681 /* Return arbitarily high cost when instruction is not preferred - this
8682 avoids gcc from using it. */
8683 if (!TARGET_USE_SAHF && !optimize_size)
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686 return (bypass_code != NIL || second_code != NIL) + 3;
8689 /* Compute cost of the comparison done using any method.
8690 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8692 ix86_fp_comparison_cost (code)
8695 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8698 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8699 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8701 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8702 if (min > sahf_cost)
8704 if (min > fcomi_cost)
8709 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8712 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8714 rtx op0, op1, scratch;
8718 enum machine_mode fpcmp_mode, intcmp_mode;
8720 int cost = ix86_fp_comparison_cost (code);
8721 enum rtx_code bypass_code, first_code, second_code;
8723 fpcmp_mode = ix86_fp_compare_mode (code);
8724 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8727 *second_test = NULL_RTX;
8729 *bypass_test = NULL_RTX;
8731 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8733 /* Do fcomi/sahf based test when profitable. */
8734 if ((bypass_code == NIL || bypass_test)
8735 && (second_code == NIL || second_test)
8736 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8740 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8741 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8747 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8748 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8750 scratch = gen_reg_rtx (HImode);
8751 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8752 emit_insn (gen_x86_sahf_1 (scratch));
8755 /* The FP codes work out to act like unsigned. */
8756 intcmp_mode = fpcmp_mode;
8758 if (bypass_code != NIL)
8759 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8760 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8762 if (second_code != NIL)
8763 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8764 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8769 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8771 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8773 scratch = gen_reg_rtx (HImode);
8774 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8776 /* In the unordered case, we have to check C2 for NaN's, which
8777 doesn't happen to work out to anything nice combination-wise.
8778 So do some bit twiddling on the value we've got in AH to come
8779 up with an appropriate set of condition codes. */
8781 intcmp_mode = CCNOmode;
8786 if (code == GT || !TARGET_IEEE_FP)
8788 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8794 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8795 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8796 intcmp_mode = CCmode;
8802 if (code == LT && TARGET_IEEE_FP)
8804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8806 intcmp_mode = CCmode;
8811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8817 if (code == GE || !TARGET_IEEE_FP)
8819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8824 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8825 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8832 if (code == LE && TARGET_IEEE_FP)
8834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8837 intcmp_mode = CCmode;
8842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8848 if (code == EQ && TARGET_IEEE_FP)
8850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8852 intcmp_mode = CCmode;
8857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8864 if (code == NE && TARGET_IEEE_FP)
8866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8867 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8892 /* Return the test that should be put into the flags user, i.e.
8893 the bcc, scc, or cmov instruction. */
8894 return gen_rtx_fmt_ee (code, VOIDmode,
8895 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8900 ix86_expand_compare (code, second_test, bypass_test)
8902 rtx *second_test, *bypass_test;
8905 op0 = ix86_compare_op0;
8906 op1 = ix86_compare_op1;
8909 *second_test = NULL_RTX;
8911 *bypass_test = NULL_RTX;
8913 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8914 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8915 second_test, bypass_test);
8917 ret = ix86_expand_int_compare (code, op0, op1);
8922 /* Return true if the CODE will result in nontrivial jump sequence. */
8924 ix86_fp_jump_nontrivial_p (code)
8927 enum rtx_code bypass_code, first_code, second_code;
8930 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8931 return bypass_code != NIL || second_code != NIL;
8935 ix86_expand_branch (code, label)
8941 switch (GET_MODE (ix86_compare_op0))
8947 tmp = ix86_expand_compare (code, NULL, NULL);
8948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8949 gen_rtx_LABEL_REF (VOIDmode, label),
8951 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8961 enum rtx_code bypass_code, first_code, second_code;
8963 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8968 /* Check whether we will use the natural sequence with one jump. If
8969 so, we can expand jump early. Otherwise delay expansion by
8970 creating compound insn to not confuse optimizers. */
8971 if (bypass_code == NIL && second_code == NIL
8974 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8975 gen_rtx_LABEL_REF (VOIDmode, label),
8980 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8981 ix86_compare_op0, ix86_compare_op1);
8982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8983 gen_rtx_LABEL_REF (VOIDmode, label),
8985 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8987 use_fcomi = ix86_use_fcomi_compare (code);
8988 vec = rtvec_alloc (3 + !use_fcomi);
8989 RTVEC_ELT (vec, 0) = tmp;
8991 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8996 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8998 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9006 /* Expand DImode branch into multiple compare+branch. */
9008 rtx lo[2], hi[2], label2;
9009 enum rtx_code code1, code2, code3;
9011 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9013 tmp = ix86_compare_op0;
9014 ix86_compare_op0 = ix86_compare_op1;
9015 ix86_compare_op1 = tmp;
9016 code = swap_condition (code);
9018 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9019 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9021 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9022 avoid two branches. This costs one extra insn, so disable when
9023 optimizing for size. */
9025 if ((code == EQ || code == NE)
9027 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9032 if (hi[1] != const0_rtx)
9033 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9034 NULL_RTX, 0, OPTAB_WIDEN);
9037 if (lo[1] != const0_rtx)
9038 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9039 NULL_RTX, 0, OPTAB_WIDEN);
9041 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9042 NULL_RTX, 0, OPTAB_WIDEN);
9044 ix86_compare_op0 = tmp;
9045 ix86_compare_op1 = const0_rtx;
9046 ix86_expand_branch (code, label);
9050 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9051 op1 is a constant and the low word is zero, then we can just
9052 examine the high word. */
9054 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9057 case LT: case LTU: case GE: case GEU:
9058 ix86_compare_op0 = hi[0];
9059 ix86_compare_op1 = hi[1];
9060 ix86_expand_branch (code, label);
9066 /* Otherwise, we need two or three jumps. */
9068 label2 = gen_label_rtx ();
9071 code2 = swap_condition (code);
9072 code3 = unsigned_condition (code);
9076 case LT: case GT: case LTU: case GTU:
9079 case LE: code1 = LT; code2 = GT; break;
9080 case GE: code1 = GT; code2 = LT; break;
9081 case LEU: code1 = LTU; code2 = GTU; break;
9082 case GEU: code1 = GTU; code2 = LTU; break;
9084 case EQ: code1 = NIL; code2 = NE; break;
9085 case NE: code2 = NIL; break;
9093 * if (hi(a) < hi(b)) goto true;
9094 * if (hi(a) > hi(b)) goto false;
9095 * if (lo(a) < lo(b)) goto true;
9099 ix86_compare_op0 = hi[0];
9100 ix86_compare_op1 = hi[1];
9103 ix86_expand_branch (code1, label);
9105 ix86_expand_branch (code2, label2);
9107 ix86_compare_op0 = lo[0];
9108 ix86_compare_op1 = lo[1];
9109 ix86_expand_branch (code3, label);
9112 emit_label (label2);
9121 /* Split branch based on floating point condition. */
9123 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9125 rtx op1, op2, target1, target2, tmp;
9128 rtx label = NULL_RTX;
9130 int bypass_probability = -1, second_probability = -1, probability = -1;
9133 if (target2 != pc_rtx)
9136 code = reverse_condition_maybe_unordered (code);
9141 condition = ix86_expand_fp_compare (code, op1, op2,
9142 tmp, &second, &bypass);
9144 if (split_branch_probability >= 0)
9146 /* Distribute the probabilities across the jumps.
9147 Assume the BYPASS and SECOND to be always test
9149 probability = split_branch_probability;
9151 /* Value of 1 is low enough to make no need for probability
9152 to be updated. Later we may run some experiments and see
9153 if unordered values are more frequent in practice. */
9155 bypass_probability = 1;
9157 second_probability = 1;
9159 if (bypass != NULL_RTX)
9161 label = gen_label_rtx ();
9162 i = emit_jump_insn (gen_rtx_SET
9164 gen_rtx_IF_THEN_ELSE (VOIDmode,
9166 gen_rtx_LABEL_REF (VOIDmode,
9169 if (bypass_probability >= 0)
9171 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9172 GEN_INT (bypass_probability),
9175 i = emit_jump_insn (gen_rtx_SET
9177 gen_rtx_IF_THEN_ELSE (VOIDmode,
9178 condition, target1, target2)));
9179 if (probability >= 0)
9181 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9182 GEN_INT (probability),
9184 if (second != NULL_RTX)
9186 i = emit_jump_insn (gen_rtx_SET
9188 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9190 if (second_probability >= 0)
9192 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9193 GEN_INT (second_probability),
9196 if (label != NULL_RTX)
9201 ix86_expand_setcc (code, dest)
9205 rtx ret, tmp, tmpreg;
9206 rtx second_test, bypass_test;
9208 if (GET_MODE (ix86_compare_op0) == DImode
9210 return 0; /* FAIL */
9212 if (GET_MODE (dest) != QImode)
9215 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9216 PUT_MODE (ret, QImode);
9221 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9222 if (bypass_test || second_test)
9224 rtx test = second_test;
9226 rtx tmp2 = gen_reg_rtx (QImode);
9233 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9235 PUT_MODE (test, QImode);
9236 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9239 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9241 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9244 return 1; /* DONE */
9247 /* Expand comparison setting or clearing carry flag. Return true when sucesfull
9248 and set pop for the operation. */
9250 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9254 enum machine_mode mode =
9255 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9257 /* Do not handle DImode compares that go trought special path. Also we can't
9258 deal with FP compares yet. This is possible to add. */
9259 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9267 /* Convert a==0 into (unsigned)a<1. */
9270 if (op1 != const0_rtx)
9273 code = (code == EQ ? LTU : GEU);
9276 /* Convert a>b into b<a or a>=b-1. */
9279 if (GET_CODE (op1) == CONST_INT)
9281 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9282 /* Bail out on overflow. We still can swap operands but that
9283 would force loading of the constant into register. */
9284 if (op1 == const0_rtx
9285 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9287 code = (code == GTU ? GEU : LTU);
9294 code = (code == GTU ? LTU : GEU);
9298 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9301 if (mode == DImode || op1 != const0_rtx)
9303 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9304 code = (code == LT ? GEU : LTU);
9308 if (mode == DImode || op1 != constm1_rtx)
9310 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9311 code = (code == LE ? GEU : LTU);
9317 ix86_compare_op0 = op0;
9318 ix86_compare_op1 = op1;
9319 *pop = ix86_expand_compare (code, NULL, NULL);
9320 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9326 ix86_expand_int_movcc (operands)
9329 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9330 rtx compare_seq, compare_op;
9331 rtx second_test, bypass_test;
9332 enum machine_mode mode = GET_MODE (operands[0]);
9333 bool sign_bit_compare_p = false;;
9336 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9337 compare_seq = get_insns ();
9340 compare_code = GET_CODE (compare_op);
9342 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9343 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9344 sign_bit_compare_p = true;
9346 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9347 HImode insns, we'd be swallowed in word prefix ops. */
9349 if ((mode != HImode || TARGET_FAST_PREFIX)
9350 && (mode != DImode || TARGET_64BIT)
9351 && GET_CODE (operands[2]) == CONST_INT
9352 && GET_CODE (operands[3]) == CONST_INT)
9354 rtx out = operands[0];
9355 HOST_WIDE_INT ct = INTVAL (operands[2]);
9356 HOST_WIDE_INT cf = INTVAL (operands[3]);
9360 /* Sign bit compares are better done using shifts than we do by using
9362 if (sign_bit_compare_p
9363 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9364 ix86_compare_op1, &compare_op))
9366 /* Detect overlap between destination and compare sources. */
9369 if (!sign_bit_compare_p)
9371 compare_code = GET_CODE (compare_op);
9373 /* To simplify rest of code, restrict to the GEU case. */
9374 if (compare_code == LTU)
9376 HOST_WIDE_INT tmp = ct;
9379 compare_code = reverse_condition (compare_code);
9380 code = reverse_condition (code);
9384 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9385 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9386 tmp = gen_reg_rtx (mode);
9389 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9391 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9395 if (code == GT || code == GE)
9396 code = reverse_condition (code);
9399 HOST_WIDE_INT tmp = ct;
9403 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9404 ix86_compare_op1, VOIDmode, 0, -1);
9417 tmp = expand_simple_binop (mode, PLUS,
9419 copy_rtx (tmp), 1, OPTAB_DIRECT);
9430 tmp = expand_simple_binop (mode, IOR,
9432 copy_rtx (tmp), 1, OPTAB_DIRECT);
9434 else if (diff == -1 && ct)
9444 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9446 tmp = expand_simple_binop (mode, PLUS,
9447 copy_rtx (tmp), GEN_INT (cf),
9448 copy_rtx (tmp), 1, OPTAB_DIRECT);
9456 * andl cf - ct, dest
9466 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9469 tmp = expand_simple_binop (mode, AND,
9471 gen_int_mode (cf - ct, mode),
9472 copy_rtx (tmp), 1, OPTAB_DIRECT);
9474 tmp = expand_simple_binop (mode, PLUS,
9475 copy_rtx (tmp), GEN_INT (ct),
9476 copy_rtx (tmp), 1, OPTAB_DIRECT);
9479 if (!rtx_equal_p (tmp, out))
9480 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9482 return 1; /* DONE */
9488 tmp = ct, ct = cf, cf = tmp;
9490 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9492 /* We may be reversing unordered compare to normal compare, that
9493 is not valid in general (we may convert non-trapping condition
9494 to trapping one), however on i386 we currently emit all
9495 comparisons unordered. */
9496 compare_code = reverse_condition_maybe_unordered (compare_code);
9497 code = reverse_condition_maybe_unordered (code);
9501 compare_code = reverse_condition (compare_code);
9502 code = reverse_condition (code);
9507 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9508 && GET_CODE (ix86_compare_op1) == CONST_INT)
9510 if (ix86_compare_op1 == const0_rtx
9511 && (code == LT || code == GE))
9512 compare_code = code;
9513 else if (ix86_compare_op1 == constm1_rtx)
9517 else if (code == GT)
9522 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9523 if (compare_code != NIL
9524 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9525 && (cf == -1 || ct == -1))
9527 /* If lea code below could be used, only optimize
9528 if it results in a 2 insn sequence. */
9530 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9531 || diff == 3 || diff == 5 || diff == 9)
9532 || (compare_code == LT && ct == -1)
9533 || (compare_code == GE && cf == -1))
9536 * notl op1 (if necessary)
9544 code = reverse_condition (code);
9547 out = emit_store_flag (out, code, ix86_compare_op0,
9548 ix86_compare_op1, VOIDmode, 0, -1);
9550 out = expand_simple_binop (mode, IOR,
9552 out, 1, OPTAB_DIRECT);
9553 if (out != operands[0])
9554 emit_move_insn (operands[0], out);
9556 return 1; /* DONE */
9561 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9562 || diff == 3 || diff == 5 || diff == 9)
9563 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9564 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9570 * lea cf(dest*(ct-cf)),dest
9574 * This also catches the degenerate setcc-only case.
9580 out = emit_store_flag (out, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, 1);
9584 /* On x86_64 the lea instruction operates on Pmode, so we need
9585 to get arithmetics done in proper mode to match. */
9592 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9596 tmp = gen_rtx_PLUS (mode, tmp, out1);
9602 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9605 if (!rtx_equal_p (tmp, out))
9608 out = force_operand (tmp, out);
9610 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9612 if (!rtx_equal_p (out, operands[0]))
9613 emit_move_insn (operands[0], copy_rtx (out));
9615 return 1; /* DONE */
9619 * General case: Jumpful:
9620 * xorl dest,dest cmpl op1, op2
9621 * cmpl op1, op2 movl ct, dest
9623 * decl dest movl cf, dest
9624 * andl (cf-ct),dest 1:
9629 * This is reasonably steep, but branch mispredict costs are
9630 * high on modern cpus, so consider failing only if optimizing
9634 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9635 && BRANCH_COST >= 2)
9641 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9642 /* We may be reversing unordered compare to normal compare,
9643 that is not valid in general (we may convert non-trapping
9644 condition to trapping one), however on i386 we currently
9645 emit all comparisons unordered. */
9646 code = reverse_condition_maybe_unordered (code);
9649 code = reverse_condition (code);
9650 if (compare_code != NIL)
9651 compare_code = reverse_condition (compare_code);
9655 if (compare_code != NIL)
9657 /* notl op1 (if needed)
9662 For x < 0 (resp. x <= -1) there will be no notl,
9663 so if possible swap the constants to get rid of the
9665 True/false will be -1/0 while code below (store flag
9666 followed by decrement) is 0/-1, so the constants need
9667 to be exchanged once more. */
9669 if (compare_code == GE || !cf)
9671 code = reverse_condition (code);
9676 HOST_WIDE_INT tmp = cf;
9681 out = emit_store_flag (out, code, ix86_compare_op0,
9682 ix86_compare_op1, VOIDmode, 0, -1);
9686 out = emit_store_flag (out, code, ix86_compare_op0,
9687 ix86_compare_op1, VOIDmode, 0, 1);
9689 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9690 copy_rtx (out), 1, OPTAB_DIRECT);
9693 out = expand_simple_binop (mode, AND, copy_rtx (out),
9694 gen_int_mode (cf - ct, mode),
9695 copy_rtx (out), 1, OPTAB_DIRECT);
9697 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9698 copy_rtx (out), 1, OPTAB_DIRECT);
9699 if (!rtx_equal_p (out, operands[0]))
9700 emit_move_insn (operands[0], copy_rtx (out));
9702 return 1; /* DONE */
9706 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9708 /* Try a few things more with specific constants and a variable. */
9711 rtx var, orig_out, out, tmp;
9713 if (BRANCH_COST <= 2)
9714 return 0; /* FAIL */
9716 /* If one of the two operands is an interesting constant, load a
9717 constant with the above and mask it in with a logical operation. */
9719 if (GET_CODE (operands[2]) == CONST_INT)
9722 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9723 operands[3] = constm1_rtx, op = and_optab;
9724 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9725 operands[3] = const0_rtx, op = ior_optab;
9727 return 0; /* FAIL */
9729 else if (GET_CODE (operands[3]) == CONST_INT)
9732 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9733 operands[2] = constm1_rtx, op = and_optab;
9734 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9735 operands[2] = const0_rtx, op = ior_optab;
9737 return 0; /* FAIL */
9740 return 0; /* FAIL */
9742 orig_out = operands[0];
9743 tmp = gen_reg_rtx (mode);
9746 /* Recurse to get the constant loaded. */
9747 if (ix86_expand_int_movcc (operands) == 0)
9748 return 0; /* FAIL */
9750 /* Mask in the interesting variable. */
9751 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9753 if (!rtx_equal_p (out, orig_out))
9754 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9756 return 1; /* DONE */
9760 * For comparison with above,
9770 if (! nonimmediate_operand (operands[2], mode))
9771 operands[2] = force_reg (mode, operands[2]);
9772 if (! nonimmediate_operand (operands[3], mode))
9773 operands[3] = force_reg (mode, operands[3]);
9775 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9777 rtx tmp = gen_reg_rtx (mode);
9778 emit_move_insn (tmp, operands[3]);
9781 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9783 rtx tmp = gen_reg_rtx (mode);
9784 emit_move_insn (tmp, operands[2]);
9788 if (! register_operand (operands[2], VOIDmode)
9790 || ! register_operand (operands[3], VOIDmode)))
9791 operands[2] = force_reg (mode, operands[2]);
9794 && ! register_operand (operands[3], VOIDmode))
9795 operands[3] = force_reg (mode, operands[3]);
9797 emit_insn (compare_seq);
9798 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9799 gen_rtx_IF_THEN_ELSE (mode,
9800 compare_op, operands[2],
9803 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9804 gen_rtx_IF_THEN_ELSE (mode,
9806 copy_rtx (operands[3]),
9807 copy_rtx (operands[0]))));
9809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9810 gen_rtx_IF_THEN_ELSE (mode,
9812 copy_rtx (operands[2]),
9813 copy_rtx (operands[0]))));
9815 return 1; /* DONE */
9819 ix86_expand_fp_movcc (operands)
9824 rtx compare_op, second_test, bypass_test;
9826 /* For SF/DFmode conditional moves based on comparisons
9827 in same mode, we may want to use SSE min/max instructions. */
9828 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9829 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9830 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9831 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9833 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9834 /* We may be called from the post-reload splitter. */
9835 && (!REG_P (operands[0])
9836 || SSE_REG_P (operands[0])
9837 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9839 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9840 code = GET_CODE (operands[1]);
9842 /* See if we have (cross) match between comparison operands and
9843 conditional move operands. */
9844 if (rtx_equal_p (operands[2], op1))
9849 code = reverse_condition_maybe_unordered (code);
9851 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9853 /* Check for min operation. */
9854 if (code == LT || code == UNLE)
9862 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9863 if (memory_operand (op0, VOIDmode))
9864 op0 = force_reg (GET_MODE (operands[0]), op0);
9865 if (GET_MODE (operands[0]) == SFmode)
9866 emit_insn (gen_minsf3 (operands[0], op0, op1));
9868 emit_insn (gen_mindf3 (operands[0], op0, op1));
9871 /* Check for max operation. */
9872 if (code == GT || code == UNGE)
9880 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9881 if (memory_operand (op0, VOIDmode))
9882 op0 = force_reg (GET_MODE (operands[0]), op0);
9883 if (GET_MODE (operands[0]) == SFmode)
9884 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9886 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9890 /* Manage condition to be sse_comparison_operator. In case we are
9891 in non-ieee mode, try to canonicalize the destination operand
9892 to be first in the comparison - this helps reload to avoid extra
9894 if (!sse_comparison_operator (operands[1], VOIDmode)
9895 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9897 rtx tmp = ix86_compare_op0;
9898 ix86_compare_op0 = ix86_compare_op1;
9899 ix86_compare_op1 = tmp;
9900 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9901 VOIDmode, ix86_compare_op0,
9904 /* Similary try to manage result to be first operand of conditional
9905 move. We also don't support the NE comparison on SSE, so try to
9907 if ((rtx_equal_p (operands[0], operands[3])
9908 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9909 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9911 rtx tmp = operands[2];
9912 operands[2] = operands[3];
9914 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9915 (GET_CODE (operands[1])),
9916 VOIDmode, ix86_compare_op0,
9919 if (GET_MODE (operands[0]) == SFmode)
9920 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9921 operands[2], operands[3],
9922 ix86_compare_op0, ix86_compare_op1));
9924 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9925 operands[2], operands[3],
9926 ix86_compare_op0, ix86_compare_op1));
9930 /* The floating point conditional move instructions don't directly
9931 support conditions resulting from a signed integer comparison. */
9933 code = GET_CODE (operands[1]);
9934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9936 /* The floating point conditional move instructions don't directly
9937 support signed integer comparisons. */
9939 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9941 if (second_test != NULL || bypass_test != NULL)
9943 tmp = gen_reg_rtx (QImode);
9944 ix86_expand_setcc (code, tmp);
9946 ix86_compare_op0 = tmp;
9947 ix86_compare_op1 = const0_rtx;
9948 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9950 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9952 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9953 emit_move_insn (tmp, operands[3]);
9956 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9959 emit_move_insn (tmp, operands[2]);
9963 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9964 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9984 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9985 works for floating pointer parameters and nonoffsetable memories.
9986 For pushes, it returns just stack offsets; the values will be saved
9987 in the right order. Maximally three parts are generated. */
9990 ix86_split_to_parts (operand, parts, mode)
9993 enum machine_mode mode;
9998 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10000 size = (GET_MODE_SIZE (mode) + 4) / 8;
10002 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10004 if (size < 2 || size > 3)
10007 /* Optimize constant pool reference to immediates. This is used by fp
10008 moves, that force all constants to memory to allow combining. */
10009 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10011 rtx tmp = maybe_get_pool_constant (operand);
10016 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10018 /* The only non-offsetable memories we handle are pushes. */
10019 if (! push_operand (operand, VOIDmode))
10022 operand = copy_rtx (operand);
10023 PUT_MODE (operand, Pmode);
10024 parts[0] = parts[1] = parts[2] = operand;
10026 else if (!TARGET_64BIT)
10028 if (mode == DImode)
10029 split_di (&operand, 1, &parts[0], &parts[1]);
10032 if (REG_P (operand))
10034 if (!reload_completed)
10036 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10037 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10039 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10041 else if (offsettable_memref_p (operand))
10043 operand = adjust_address (operand, SImode, 0);
10044 parts[0] = operand;
10045 parts[1] = adjust_address (operand, SImode, 4);
10047 parts[2] = adjust_address (operand, SImode, 8);
10049 else if (GET_CODE (operand) == CONST_DOUBLE)
10054 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10059 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10060 parts[2] = gen_int_mode (l[2], SImode);
10063 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10068 parts[1] = gen_int_mode (l[1], SImode);
10069 parts[0] = gen_int_mode (l[0], SImode);
10077 if (mode == TImode)
10078 split_ti (&operand, 1, &parts[0], &parts[1]);
10079 if (mode == XFmode || mode == TFmode)
10081 if (REG_P (operand))
10083 if (!reload_completed)
10085 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10086 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10088 else if (offsettable_memref_p (operand))
10090 operand = adjust_address (operand, DImode, 0);
10091 parts[0] = operand;
10092 parts[1] = adjust_address (operand, SImode, 8);
10094 else if (GET_CODE (operand) == CONST_DOUBLE)
10099 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10100 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10101 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10102 if (HOST_BITS_PER_WIDE_INT >= 64)
10105 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10106 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10109 parts[0] = immed_double_const (l[0], l[1], DImode);
10110 parts[1] = gen_int_mode (l[2], SImode);
10120 /* Emit insns to perform a move or push of DI, DF, and XF values.
10121 Return false when normal moves are needed; true when all required
10122 insns have been emitted. Operands 2-4 contain the input values
10123 int the correct order; operands 5-7 contain the output values. */
10126 ix86_split_long_move (operands)
10132 int collisions = 0;
10133 enum machine_mode mode = GET_MODE (operands[0]);
10135 /* The DFmode expanders may ask us to move double.
10136 For 64bit target this is single move. By hiding the fact
10137 here we simplify i386.md splitters. */
10138 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10140 /* Optimize constant pool reference to immediates. This is used by
10141 fp moves, that force all constants to memory to allow combining. */
10143 if (GET_CODE (operands[1]) == MEM
10144 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10145 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10146 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10147 if (push_operand (operands[0], VOIDmode))
10149 operands[0] = copy_rtx (operands[0]);
10150 PUT_MODE (operands[0], Pmode);
10153 operands[0] = gen_lowpart (DImode, operands[0]);
10154 operands[1] = gen_lowpart (DImode, operands[1]);
10155 emit_move_insn (operands[0], operands[1]);
10159 /* The only non-offsettable memory we handle is push. */
10160 if (push_operand (operands[0], VOIDmode))
10162 else if (GET_CODE (operands[0]) == MEM
10163 && ! offsettable_memref_p (operands[0]))
10166 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10167 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10169 /* When emitting push, take care for source operands on the stack. */
10170 if (push && GET_CODE (operands[1]) == MEM
10171 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10174 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10175 XEXP (part[1][2], 0));
10176 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10177 XEXP (part[1][1], 0));
10180 /* We need to do copy in the right order in case an address register
10181 of the source overlaps the destination. */
10182 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10184 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10186 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10189 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10192 /* Collision in the middle part can be handled by reordering. */
10193 if (collisions == 1 && nparts == 3
10194 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10197 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10198 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10201 /* If there are more collisions, we can't handle it by reordering.
10202 Do an lea to the last part and use only one colliding move. */
10203 else if (collisions > 1)
10206 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10207 XEXP (part[1][0], 0)));
10208 part[1][0] = change_address (part[1][0],
10209 TARGET_64BIT ? DImode : SImode,
10210 part[0][nparts - 1]);
10211 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10213 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10223 /* We use only first 12 bytes of TFmode value, but for pushing we
10224 are required to adjust stack as if we were pushing real 16byte
10226 if (mode == TFmode && !TARGET_64BIT)
10227 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10229 emit_move_insn (part[0][2], part[1][2]);
10234 /* In 64bit mode we don't have 32bit push available. In case this is
10235 register, it is OK - we will just use larger counterpart. We also
10236 retype memory - these comes from attempt to avoid REX prefix on
10237 moving of second half of TFmode value. */
10238 if (GET_MODE (part[1][1]) == SImode)
10240 if (GET_CODE (part[1][1]) == MEM)
10241 part[1][1] = adjust_address (part[1][1], DImode, 0);
10242 else if (REG_P (part[1][1]))
10243 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10246 if (GET_MODE (part[1][0]) == SImode)
10247 part[1][0] = part[1][1];
10250 emit_move_insn (part[0][1], part[1][1]);
10251 emit_move_insn (part[0][0], part[1][0]);
10255 /* Choose correct order to not overwrite the source before it is copied. */
10256 if ((REG_P (part[0][0])
10257 && REG_P (part[1][1])
10258 && (REGNO (part[0][0]) == REGNO (part[1][1])
10260 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10262 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10266 operands[2] = part[0][2];
10267 operands[3] = part[0][1];
10268 operands[4] = part[0][0];
10269 operands[5] = part[1][2];
10270 operands[6] = part[1][1];
10271 operands[7] = part[1][0];
10275 operands[2] = part[0][1];
10276 operands[3] = part[0][0];
10277 operands[5] = part[1][1];
10278 operands[6] = part[1][0];
10285 operands[2] = part[0][0];
10286 operands[3] = part[0][1];
10287 operands[4] = part[0][2];
10288 operands[5] = part[1][0];
10289 operands[6] = part[1][1];
10290 operands[7] = part[1][2];
10294 operands[2] = part[0][0];
10295 operands[3] = part[0][1];
10296 operands[5] = part[1][0];
10297 operands[6] = part[1][1];
10300 emit_move_insn (operands[2], operands[5]);
10301 emit_move_insn (operands[3], operands[6]);
10303 emit_move_insn (operands[4], operands[7]);
10309 ix86_split_ashldi (operands, scratch)
10310 rtx *operands, scratch;
10312 rtx low[2], high[2];
10315 if (GET_CODE (operands[2]) == CONST_INT)
10317 split_di (operands, 2, low, high);
10318 count = INTVAL (operands[2]) & 63;
10322 emit_move_insn (high[0], low[1]);
10323 emit_move_insn (low[0], const0_rtx);
10326 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10330 if (!rtx_equal_p (operands[0], operands[1]))
10331 emit_move_insn (operands[0], operands[1]);
10332 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10333 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10338 if (!rtx_equal_p (operands[0], operands[1]))
10339 emit_move_insn (operands[0], operands[1]);
10341 split_di (operands, 1, low, high);
10343 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10344 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10346 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10348 if (! no_new_pseudos)
10349 scratch = force_reg (SImode, const0_rtx);
10351 emit_move_insn (scratch, const0_rtx);
10353 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10357 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10362 ix86_split_ashrdi (operands, scratch)
10363 rtx *operands, scratch;
10365 rtx low[2], high[2];
10368 if (GET_CODE (operands[2]) == CONST_INT)
10370 split_di (operands, 2, low, high);
10371 count = INTVAL (operands[2]) & 63;
10375 emit_move_insn (low[0], high[1]);
10377 if (! reload_completed)
10378 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10381 emit_move_insn (high[0], low[0]);
10382 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10386 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10390 if (!rtx_equal_p (operands[0], operands[1]))
10391 emit_move_insn (operands[0], operands[1]);
10392 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10393 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10398 if (!rtx_equal_p (operands[0], operands[1]))
10399 emit_move_insn (operands[0], operands[1]);
10401 split_di (operands, 1, low, high);
10403 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10404 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10406 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10408 if (! no_new_pseudos)
10409 scratch = gen_reg_rtx (SImode);
10410 emit_move_insn (scratch, high[0]);
10411 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10412 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10416 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10421 ix86_split_lshrdi (operands, scratch)
10422 rtx *operands, scratch;
10424 rtx low[2], high[2];
10427 if (GET_CODE (operands[2]) == CONST_INT)
10429 split_di (operands, 2, low, high);
10430 count = INTVAL (operands[2]) & 63;
10434 emit_move_insn (low[0], high[1]);
10435 emit_move_insn (high[0], const0_rtx);
10438 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10442 if (!rtx_equal_p (operands[0], operands[1]))
10443 emit_move_insn (operands[0], operands[1]);
10444 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10445 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10450 if (!rtx_equal_p (operands[0], operands[1]))
10451 emit_move_insn (operands[0], operands[1]);
10453 split_di (operands, 1, low, high);
10455 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10456 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10458 /* Heh. By reversing the arguments, we can reuse this pattern. */
10459 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10461 if (! no_new_pseudos)
10462 scratch = force_reg (SImode, const0_rtx);
10464 emit_move_insn (scratch, const0_rtx);
10466 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10470 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10474 /* Helper function for the string operations below. Dest VARIABLE whether
10475 it is aligned to VALUE bytes. If true, jump to the label. */
10477 ix86_expand_aligntest (variable, value)
10481 rtx label = gen_label_rtx ();
10482 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10483 if (GET_MODE (variable) == DImode)
10484 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10486 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10487 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10492 /* Adjust COUNTER by the VALUE. */
10494 ix86_adjust_counter (countreg, value)
10496 HOST_WIDE_INT value;
10498 if (GET_MODE (countreg) == DImode)
10499 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10501 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10504 /* Zero extend possibly SImode EXP to Pmode register. */
10506 ix86_zero_extend_to_Pmode (exp)
10510 if (GET_MODE (exp) == VOIDmode)
10511 return force_reg (Pmode, exp);
10512 if (GET_MODE (exp) == Pmode)
10513 return copy_to_mode_reg (Pmode, exp);
10514 r = gen_reg_rtx (Pmode);
10515 emit_insn (gen_zero_extendsidi2 (r, exp));
10519 /* Expand string move (memcpy) operation. Use i386 string operations when
10520 profitable. expand_clrstr contains similar code. */
10522 ix86_expand_movstr (dst, src, count_exp, align_exp)
10523 rtx dst, src, count_exp, align_exp;
10525 rtx srcreg, destreg, countreg;
10526 enum machine_mode counter_mode;
10527 HOST_WIDE_INT align = 0;
10528 unsigned HOST_WIDE_INT count = 0;
10533 if (GET_CODE (align_exp) == CONST_INT)
10534 align = INTVAL (align_exp);
10536 /* This simple hack avoids all inlining code and simplifies code below. */
10537 if (!TARGET_ALIGN_STRINGOPS)
10540 if (GET_CODE (count_exp) == CONST_INT)
10541 count = INTVAL (count_exp);
10543 /* Figure out proper mode for counter. For 32bits it is always SImode,
10544 for 64bits use SImode when possible, otherwise DImode.
10545 Set count to number of bytes copied when known at compile time. */
10546 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10547 || x86_64_zero_extended_value (count_exp))
10548 counter_mode = SImode;
10550 counter_mode = DImode;
10552 if (counter_mode != SImode && counter_mode != DImode)
10555 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10556 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10558 emit_insn (gen_cld ());
10560 /* When optimizing for size emit simple rep ; movsb instruction for
10561 counts not divisible by 4. */
10563 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10565 countreg = ix86_zero_extend_to_Pmode (count_exp);
10567 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10568 destreg, srcreg, countreg));
10570 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10571 destreg, srcreg, countreg));
10574 /* For constant aligned (or small unaligned) copies use rep movsl
10575 followed by code copying the rest. For PentiumPro ensure 8 byte
10576 alignment to allow rep movsl acceleration. */
10578 else if (count != 0
10580 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10581 || optimize_size || count < (unsigned int) 64))
10583 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10584 if (count & ~(size - 1))
10586 countreg = copy_to_mode_reg (counter_mode,
10587 GEN_INT ((count >> (size == 4 ? 2 : 3))
10588 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10589 countreg = ix86_zero_extend_to_Pmode (countreg);
10593 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10594 destreg, srcreg, countreg));
10596 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10597 destreg, srcreg, countreg));
10600 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10601 destreg, srcreg, countreg));
10603 if (size == 8 && (count & 0x04))
10604 emit_insn (gen_strmovsi (destreg, srcreg));
10606 emit_insn (gen_strmovhi (destreg, srcreg));
10608 emit_insn (gen_strmovqi (destreg, srcreg));
10610 /* The generic code based on the glibc implementation:
10611 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10612 allowing accelerated copying there)
10613 - copy the data using rep movsl
10614 - copy the rest. */
10619 int desired_alignment = (TARGET_PENTIUMPRO
10620 && (count == 0 || count >= (unsigned int) 260)
10621 ? 8 : UNITS_PER_WORD);
10623 /* In case we don't know anything about the alignment, default to
10624 library version, since it is usually equally fast and result in
10627 Also emit call when we know that the count is large and call overhead
10628 will not be important. */
10629 if (!TARGET_INLINE_ALL_STRINGOPS
10630 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10636 if (TARGET_SINGLE_STRINGOP)
10637 emit_insn (gen_cld ());
10639 countreg2 = gen_reg_rtx (Pmode);
10640 countreg = copy_to_mode_reg (counter_mode, count_exp);
10642 /* We don't use loops to align destination and to copy parts smaller
10643 than 4 bytes, because gcc is able to optimize such code better (in
10644 the case the destination or the count really is aligned, gcc is often
10645 able to predict the branches) and also it is friendlier to the
10646 hardware branch prediction.
10648 Using loops is benefical for generic case, because we can
10649 handle small counts using the loops. Many CPUs (such as Athlon)
10650 have large REP prefix setup costs.
10652 This is quite costy. Maybe we can revisit this decision later or
10653 add some customizability to this code. */
10655 if (count == 0 && align < desired_alignment)
10657 label = gen_label_rtx ();
10658 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10659 LEU, 0, counter_mode, 1, label);
10663 rtx label = ix86_expand_aligntest (destreg, 1);
10664 emit_insn (gen_strmovqi (destreg, srcreg));
10665 ix86_adjust_counter (countreg, 1);
10666 emit_label (label);
10667 LABEL_NUSES (label) = 1;
10671 rtx label = ix86_expand_aligntest (destreg, 2);
10672 emit_insn (gen_strmovhi (destreg, srcreg));
10673 ix86_adjust_counter (countreg, 2);
10674 emit_label (label);
10675 LABEL_NUSES (label) = 1;
10677 if (align <= 4 && desired_alignment > 4)
10679 rtx label = ix86_expand_aligntest (destreg, 4);
10680 emit_insn (gen_strmovsi (destreg, srcreg));
10681 ix86_adjust_counter (countreg, 4);
10682 emit_label (label);
10683 LABEL_NUSES (label) = 1;
10686 if (label && desired_alignment > 4 && !TARGET_64BIT)
10688 emit_label (label);
10689 LABEL_NUSES (label) = 1;
10692 if (!TARGET_SINGLE_STRINGOP)
10693 emit_insn (gen_cld ());
10696 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10698 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10699 destreg, srcreg, countreg2));
10703 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10704 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10705 destreg, srcreg, countreg2));
10710 emit_label (label);
10711 LABEL_NUSES (label) = 1;
10713 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10714 emit_insn (gen_strmovsi (destreg, srcreg));
10715 if ((align <= 4 || count == 0) && TARGET_64BIT)
10717 rtx label = ix86_expand_aligntest (countreg, 4);
10718 emit_insn (gen_strmovsi (destreg, srcreg));
10719 emit_label (label);
10720 LABEL_NUSES (label) = 1;
10722 if (align > 2 && count != 0 && (count & 2))
10723 emit_insn (gen_strmovhi (destreg, srcreg));
10724 if (align <= 2 || count == 0)
10726 rtx label = ix86_expand_aligntest (countreg, 2);
10727 emit_insn (gen_strmovhi (destreg, srcreg));
10728 emit_label (label);
10729 LABEL_NUSES (label) = 1;
10731 if (align > 1 && count != 0 && (count & 1))
10732 emit_insn (gen_strmovqi (destreg, srcreg));
10733 if (align <= 1 || count == 0)
10735 rtx label = ix86_expand_aligntest (countreg, 1);
10736 emit_insn (gen_strmovqi (destreg, srcreg));
10737 emit_label (label);
10738 LABEL_NUSES (label) = 1;
10742 insns = get_insns ();
10745 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10750 /* Expand string clear operation (bzero). Use i386 string operations when
10751 profitable. expand_movstr contains similar code. */
10753 ix86_expand_clrstr (src, count_exp, align_exp)
10754 rtx src, count_exp, align_exp;
10756 rtx destreg, zeroreg, countreg;
10757 enum machine_mode counter_mode;
10758 HOST_WIDE_INT align = 0;
10759 unsigned HOST_WIDE_INT count = 0;
10761 if (GET_CODE (align_exp) == CONST_INT)
10762 align = INTVAL (align_exp);
10764 /* This simple hack avoids all inlining code and simplifies code below. */
10765 if (!TARGET_ALIGN_STRINGOPS)
10768 if (GET_CODE (count_exp) == CONST_INT)
10769 count = INTVAL (count_exp);
10770 /* Figure out proper mode for counter. For 32bits it is always SImode,
10771 for 64bits use SImode when possible, otherwise DImode.
10772 Set count to number of bytes copied when known at compile time. */
10773 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10774 || x86_64_zero_extended_value (count_exp))
10775 counter_mode = SImode;
10777 counter_mode = DImode;
10779 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10781 emit_insn (gen_cld ());
10783 /* When optimizing for size emit simple rep ; movsb instruction for
10784 counts not divisible by 4. */
10786 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10788 countreg = ix86_zero_extend_to_Pmode (count_exp);
10789 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10791 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10792 destreg, countreg));
10794 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10795 destreg, countreg));
10797 else if (count != 0
10799 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10800 || optimize_size || count < (unsigned int) 64))
10802 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10803 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10804 if (count & ~(size - 1))
10806 countreg = copy_to_mode_reg (counter_mode,
10807 GEN_INT ((count >> (size == 4 ? 2 : 3))
10808 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10809 countreg = ix86_zero_extend_to_Pmode (countreg);
10813 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10814 destreg, countreg));
10816 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10817 destreg, countreg));
10820 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10821 destreg, countreg));
10823 if (size == 8 && (count & 0x04))
10824 emit_insn (gen_strsetsi (destreg,
10825 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10827 emit_insn (gen_strsethi (destreg,
10828 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10830 emit_insn (gen_strsetqi (destreg,
10831 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10837 /* Compute desired alignment of the string operation. */
10838 int desired_alignment = (TARGET_PENTIUMPRO
10839 && (count == 0 || count >= (unsigned int) 260)
10840 ? 8 : UNITS_PER_WORD);
10842 /* In case we don't know anything about the alignment, default to
10843 library version, since it is usually equally fast and result in
10846 Also emit call when we know that the count is large and call overhead
10847 will not be important. */
10848 if (!TARGET_INLINE_ALL_STRINGOPS
10849 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10852 if (TARGET_SINGLE_STRINGOP)
10853 emit_insn (gen_cld ());
10855 countreg2 = gen_reg_rtx (Pmode);
10856 countreg = copy_to_mode_reg (counter_mode, count_exp);
10857 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10859 if (count == 0 && align < desired_alignment)
10861 label = gen_label_rtx ();
10862 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10863 LEU, 0, counter_mode, 1, label);
10867 rtx label = ix86_expand_aligntest (destreg, 1);
10868 emit_insn (gen_strsetqi (destreg,
10869 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10870 ix86_adjust_counter (countreg, 1);
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10876 rtx label = ix86_expand_aligntest (destreg, 2);
10877 emit_insn (gen_strsethi (destreg,
10878 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10879 ix86_adjust_counter (countreg, 2);
10880 emit_label (label);
10881 LABEL_NUSES (label) = 1;
10883 if (align <= 4 && desired_alignment > 4)
10885 rtx label = ix86_expand_aligntest (destreg, 4);
10886 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10887 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10889 ix86_adjust_counter (countreg, 4);
10890 emit_label (label);
10891 LABEL_NUSES (label) = 1;
10894 if (label && desired_alignment > 4 && !TARGET_64BIT)
10896 emit_label (label);
10897 LABEL_NUSES (label) = 1;
10901 if (!TARGET_SINGLE_STRINGOP)
10902 emit_insn (gen_cld ());
10905 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10907 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10908 destreg, countreg2));
10912 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10913 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10914 destreg, countreg2));
10918 emit_label (label);
10919 LABEL_NUSES (label) = 1;
10922 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10923 emit_insn (gen_strsetsi (destreg,
10924 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10925 if (TARGET_64BIT && (align <= 4 || count == 0))
10927 rtx label = ix86_expand_aligntest (countreg, 4);
10928 emit_insn (gen_strsetsi (destreg,
10929 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10930 emit_label (label);
10931 LABEL_NUSES (label) = 1;
10933 if (align > 2 && count != 0 && (count & 2))
10934 emit_insn (gen_strsethi (destreg,
10935 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10936 if (align <= 2 || count == 0)
10938 rtx label = ix86_expand_aligntest (countreg, 2);
10939 emit_insn (gen_strsethi (destreg,
10940 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10941 emit_label (label);
10942 LABEL_NUSES (label) = 1;
10944 if (align > 1 && count != 0 && (count & 1))
10945 emit_insn (gen_strsetqi (destreg,
10946 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10947 if (align <= 1 || count == 0)
10949 rtx label = ix86_expand_aligntest (countreg, 1);
10950 emit_insn (gen_strsetqi (destreg,
10951 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10952 emit_label (label);
10953 LABEL_NUSES (label) = 1;
10958 /* Expand strlen. */
10960 ix86_expand_strlen (out, src, eoschar, align)
10961 rtx out, src, eoschar, align;
10963 rtx addr, scratch1, scratch2, scratch3, scratch4;
10965 /* The generic case of strlen expander is long. Avoid it's
10966 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10968 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10969 && !TARGET_INLINE_ALL_STRINGOPS
10971 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10974 addr = force_reg (Pmode, XEXP (src, 0));
10975 scratch1 = gen_reg_rtx (Pmode);
10977 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10980 /* Well it seems that some optimizer does not combine a call like
10981 foo(strlen(bar), strlen(bar));
10982 when the move and the subtraction is done here. It does calculate
10983 the length just once when these instructions are done inside of
10984 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10985 often used and I use one fewer register for the lifetime of
10986 output_strlen_unroll() this is better. */
10988 emit_move_insn (out, addr);
10990 ix86_expand_strlensi_unroll_1 (out, align);
10992 /* strlensi_unroll_1 returns the address of the zero at the end of
10993 the string, like memchr(), so compute the length by subtracting
10994 the start address. */
10996 emit_insn (gen_subdi3 (out, out, addr));
10998 emit_insn (gen_subsi3 (out, out, addr));
11002 scratch2 = gen_reg_rtx (Pmode);
11003 scratch3 = gen_reg_rtx (Pmode);
11004 scratch4 = force_reg (Pmode, constm1_rtx);
11006 emit_move_insn (scratch3, addr);
11007 eoschar = force_reg (QImode, eoschar);
11009 emit_insn (gen_cld ());
11012 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11013 align, scratch4, scratch3));
11014 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11015 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11019 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11020 align, scratch4, scratch3));
11021 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11022 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11028 /* Expand the appropriate insns for doing strlen if not just doing
11031 out = result, initialized with the start address
11032 align_rtx = alignment of the address.
11033 scratch = scratch register, initialized with the startaddress when
11034 not aligned, otherwise undefined
11036 This is just the body. It needs the initialisations mentioned above and
11037 some address computing at the end. These things are done in i386.md. */
11040 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11041 rtx out, align_rtx;
11045 rtx align_2_label = NULL_RTX;
11046 rtx align_3_label = NULL_RTX;
11047 rtx align_4_label = gen_label_rtx ();
11048 rtx end_0_label = gen_label_rtx ();
11050 rtx tmpreg = gen_reg_rtx (SImode);
11051 rtx scratch = gen_reg_rtx (SImode);
11054 if (GET_CODE (align_rtx) == CONST_INT)
11055 align = INTVAL (align_rtx);
11057 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11059 /* Is there a known alignment and is it less than 4? */
11062 rtx scratch1 = gen_reg_rtx (Pmode);
11063 emit_move_insn (scratch1, out);
11064 /* Is there a known alignment and is it not 2? */
11067 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11068 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11070 /* Leave just the 3 lower bits. */
11071 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11072 NULL_RTX, 0, OPTAB_WIDEN);
11074 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11075 Pmode, 1, align_4_label);
11076 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11077 Pmode, 1, align_2_label);
11078 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11079 Pmode, 1, align_3_label);
11083 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11084 check if is aligned to 4 - byte. */
11086 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11087 NULL_RTX, 0, OPTAB_WIDEN);
11089 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11090 Pmode, 1, align_4_label);
11093 mem = gen_rtx_MEM (QImode, out);
11095 /* Now compare the bytes. */
11097 /* Compare the first n unaligned byte on a byte per byte basis. */
11098 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11099 QImode, 1, end_0_label);
11101 /* Increment the address. */
11103 emit_insn (gen_adddi3 (out, out, const1_rtx));
11105 emit_insn (gen_addsi3 (out, out, const1_rtx));
11107 /* Not needed with an alignment of 2 */
11110 emit_label (align_2_label);
11112 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11116 emit_insn (gen_adddi3 (out, out, const1_rtx));
11118 emit_insn (gen_addsi3 (out, out, const1_rtx));
11120 emit_label (align_3_label);
11123 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11127 emit_insn (gen_adddi3 (out, out, const1_rtx));
11129 emit_insn (gen_addsi3 (out, out, const1_rtx));
11132 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11133 align this loop. It gives only huge programs, but does not help to
11135 emit_label (align_4_label);
11137 mem = gen_rtx_MEM (SImode, out);
11138 emit_move_insn (scratch, mem);
11140 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11142 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11144 /* This formula yields a nonzero result iff one of the bytes is zero.
11145 This saves three branches inside loop and many cycles. */
11147 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11148 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11149 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11150 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11151 gen_int_mode (0x80808080, SImode)));
11152 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11157 rtx reg = gen_reg_rtx (SImode);
11158 rtx reg2 = gen_reg_rtx (Pmode);
11159 emit_move_insn (reg, tmpreg);
11160 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11162 /* If zero is not in the first two bytes, move two bytes forward. */
11163 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11164 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11165 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11166 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11167 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11170 /* Emit lea manually to avoid clobbering of flags. */
11171 emit_insn (gen_rtx_SET (SImode, reg2,
11172 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11174 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11175 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11176 emit_insn (gen_rtx_SET (VOIDmode, out,
11177 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11184 rtx end_2_label = gen_label_rtx ();
11185 /* Is zero in the first two bytes? */
11187 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11188 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11189 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11190 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11191 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11193 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11194 JUMP_LABEL (tmp) = end_2_label;
11196 /* Not in the first two. Move two bytes forward. */
11197 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11199 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11201 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11203 emit_label (end_2_label);
11207 /* Avoid branch in fixing the byte. */
11208 tmpreg = gen_lowpart (QImode, tmpreg);
11209 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11211 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11213 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11215 emit_label (end_0_label);
11219 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11220 rtx retval, fnaddr, callarg1, callarg2, pop;
11223 rtx use = NULL, call;
11225 if (pop == const0_rtx)
11227 if (TARGET_64BIT && pop)
11231 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11232 fnaddr = machopic_indirect_call_target (fnaddr);
11234 /* Static functions and indirect calls don't need the pic register. */
11235 if (! TARGET_64BIT && flag_pic
11236 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11237 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11238 use_reg (&use, pic_offset_table_rtx);
11240 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11242 rtx al = gen_rtx_REG (QImode, 0);
11243 emit_move_insn (al, callarg2);
11244 use_reg (&use, al);
11246 #endif /* TARGET_MACHO */
11248 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11250 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11251 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11253 if (sibcall && TARGET_64BIT
11254 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11257 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11258 fnaddr = gen_rtx_REG (Pmode, 40);
11259 emit_move_insn (fnaddr, addr);
11260 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11263 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11265 call = gen_rtx_SET (VOIDmode, retval, call);
11268 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11269 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11270 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11273 call = emit_call_insn (call);
11275 CALL_INSN_FUNCTION_USAGE (call) = use;
11279 /* Clear stack slot assignments remembered from previous functions.
11280 This is called from INIT_EXPANDERS once before RTL is emitted for each
11283 static struct machine_function *
11284 ix86_init_machine_status ()
11286 return ggc_alloc_cleared (sizeof (struct machine_function));
11289 /* Return a MEM corresponding to a stack slot with mode MODE.
11290 Allocate a new slot if necessary.
11292 The RTL for a function can have several slots available: N is
11293 which slot to use. */
11296 assign_386_stack_local (mode, n)
11297 enum machine_mode mode;
11300 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11303 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11304 ix86_stack_locals[(int) mode][n]
11305 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11307 return ix86_stack_locals[(int) mode][n];
11310 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11312 static GTY(()) rtx ix86_tls_symbol;
11314 ix86_tls_get_addr ()
11317 if (!ix86_tls_symbol)
11319 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11320 (TARGET_GNU_TLS && !TARGET_64BIT)
11321 ? "___tls_get_addr"
11322 : "__tls_get_addr");
11325 return ix86_tls_symbol;
11328 /* Calculate the length of the memory address in the instruction
11329 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11332 memory_address_length (addr)
11335 struct ix86_address parts;
11336 rtx base, index, disp;
11339 if (GET_CODE (addr) == PRE_DEC
11340 || GET_CODE (addr) == POST_INC
11341 || GET_CODE (addr) == PRE_MODIFY
11342 || GET_CODE (addr) == POST_MODIFY)
11345 if (! ix86_decompose_address (addr, &parts))
11349 index = parts.index;
11353 /* Register Indirect. */
11354 if (base && !index && !disp)
11356 /* Special cases: ebp and esp need the two-byte modrm form. */
11357 if (addr == stack_pointer_rtx
11358 || addr == arg_pointer_rtx
11359 || addr == frame_pointer_rtx
11360 || addr == hard_frame_pointer_rtx)
11364 /* Direct Addressing. */
11365 else if (disp && !base && !index)
11370 /* Find the length of the displacement constant. */
11373 if (GET_CODE (disp) == CONST_INT
11374 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11380 /* An index requires the two-byte modrm form. */
11388 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11389 is set, expect that insn have 8bit immediate alternative. */
11391 ix86_attr_length_immediate_default (insn, shortform)
11397 extract_insn_cached (insn);
11398 for (i = recog_data.n_operands - 1; i >= 0; --i)
11399 if (CONSTANT_P (recog_data.operand[i]))
11404 && GET_CODE (recog_data.operand[i]) == CONST_INT
11405 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11409 switch (get_attr_mode (insn))
11420 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11425 fatal_insn ("unknown insn mode", insn);
11431 /* Compute default value for "length_address" attribute. */
11433 ix86_attr_length_address_default (insn)
11437 extract_insn_cached (insn);
11438 for (i = recog_data.n_operands - 1; i >= 0; --i)
11439 if (GET_CODE (recog_data.operand[i]) == MEM)
11441 return memory_address_length (XEXP (recog_data.operand[i], 0));
11447 /* Return the maximum number of instructions a cpu can issue. */
11454 case PROCESSOR_PENTIUM:
11458 case PROCESSOR_PENTIUMPRO:
11459 case PROCESSOR_PENTIUM4:
11460 case PROCESSOR_ATHLON:
11469 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11470 by DEP_INSN and nothing set by DEP_INSN. */
11473 ix86_flags_dependant (insn, dep_insn, insn_type)
11474 rtx insn, dep_insn;
11475 enum attr_type insn_type;
11479 /* Simplify the test for uninteresting insns. */
11480 if (insn_type != TYPE_SETCC
11481 && insn_type != TYPE_ICMOV
11482 && insn_type != TYPE_FCMOV
11483 && insn_type != TYPE_IBR)
11486 if ((set = single_set (dep_insn)) != 0)
11488 set = SET_DEST (set);
11491 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11492 && XVECLEN (PATTERN (dep_insn), 0) == 2
11493 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11494 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11496 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11497 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11502 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11505 /* This test is true if the dependent insn reads the flags but
11506 not any other potentially set register. */
11507 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11510 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11516 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11517 address with operands set by DEP_INSN. */
11520 ix86_agi_dependant (insn, dep_insn, insn_type)
11521 rtx insn, dep_insn;
11522 enum attr_type insn_type;
11526 if (insn_type == TYPE_LEA
11529 addr = PATTERN (insn);
11530 if (GET_CODE (addr) == SET)
11532 else if (GET_CODE (addr) == PARALLEL
11533 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11534 addr = XVECEXP (addr, 0, 0);
11537 addr = SET_SRC (addr);
11542 extract_insn_cached (insn);
11543 for (i = recog_data.n_operands - 1; i >= 0; --i)
11544 if (GET_CODE (recog_data.operand[i]) == MEM)
11546 addr = XEXP (recog_data.operand[i], 0);
11553 return modified_in_p (addr, dep_insn);
11557 ix86_adjust_cost (insn, link, dep_insn, cost)
11558 rtx insn, link, dep_insn;
11561 enum attr_type insn_type, dep_insn_type;
11562 enum attr_memory memory, dep_memory;
11564 int dep_insn_code_number;
11566 /* Anti and output depenancies have zero cost on all CPUs. */
11567 if (REG_NOTE_KIND (link) != 0)
11570 dep_insn_code_number = recog_memoized (dep_insn);
11572 /* If we can't recognize the insns, we can't really do anything. */
11573 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11576 insn_type = get_attr_type (insn);
11577 dep_insn_type = get_attr_type (dep_insn);
11581 case PROCESSOR_PENTIUM:
11582 /* Address Generation Interlock adds a cycle of latency. */
11583 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11586 /* ??? Compares pair with jump/setcc. */
11587 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11590 /* Floating point stores require value to be ready one cycle ealier. */
11591 if (insn_type == TYPE_FMOV
11592 && get_attr_memory (insn) == MEMORY_STORE
11593 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11597 case PROCESSOR_PENTIUMPRO:
11598 memory = get_attr_memory (insn);
11599 dep_memory = get_attr_memory (dep_insn);
11601 /* Since we can't represent delayed latencies of load+operation,
11602 increase the cost here for non-imov insns. */
11603 if (dep_insn_type != TYPE_IMOV
11604 && dep_insn_type != TYPE_FMOV
11605 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11608 /* INT->FP conversion is expensive. */
11609 if (get_attr_fp_int_src (dep_insn))
11612 /* There is one cycle extra latency between an FP op and a store. */
11613 if (insn_type == TYPE_FMOV
11614 && (set = single_set (dep_insn)) != NULL_RTX
11615 && (set2 = single_set (insn)) != NULL_RTX
11616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11617 && GET_CODE (SET_DEST (set2)) == MEM)
11620 /* Show ability of reorder buffer to hide latency of load by executing
11621 in parallel with previous instruction in case
11622 previous instruction is not needed to compute the address. */
11623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11624 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11626 /* Claim moves to take one cycle, as core can issue one load
11627 at time and the next load can start cycle later. */
11628 if (dep_insn_type == TYPE_IMOV
11629 || dep_insn_type == TYPE_FMOV)
11637 memory = get_attr_memory (insn);
11638 dep_memory = get_attr_memory (dep_insn);
11639 /* The esp dependency is resolved before the instruction is really
11641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11645 /* Since we can't represent delayed latencies of load+operation,
11646 increase the cost here for non-imov insns. */
11647 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11648 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11650 /* INT->FP conversion is expensive. */
11651 if (get_attr_fp_int_src (dep_insn))
11654 /* Show ability of reorder buffer to hide latency of load by executing
11655 in parallel with previous instruction in case
11656 previous instruction is not needed to compute the address. */
11657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11658 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11660 /* Claim moves to take one cycle, as core can issue one load
11661 at time and the next load can start cycle later. */
11662 if (dep_insn_type == TYPE_IMOV
11663 || dep_insn_type == TYPE_FMOV)
11672 case PROCESSOR_ATHLON:
11674 memory = get_attr_memory (insn);
11675 dep_memory = get_attr_memory (dep_insn);
11677 /* Show ability of reorder buffer to hide latency of load by executing
11678 in parallel with previous instruction in case
11679 previous instruction is not needed to compute the address. */
11680 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11681 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11683 /* Claim moves to take one cycle, as core can issue one load
11684 at time and the next load can start cycle later. */
11685 if (dep_insn_type == TYPE_IMOV
11686 || dep_insn_type == TYPE_FMOV)
11688 else if (cost >= 3)
11703 struct ppro_sched_data
11706 int issued_this_cycle;
11710 static enum attr_ppro_uops
11711 ix86_safe_ppro_uops (insn)
11714 if (recog_memoized (insn) >= 0)
11715 return get_attr_ppro_uops (insn);
11717 return PPRO_UOPS_MANY;
11721 ix86_dump_ppro_packet (dump)
11724 if (ix86_sched_data.ppro.decode[0])
11726 fprintf (dump, "PPRO packet: %d",
11727 INSN_UID (ix86_sched_data.ppro.decode[0]));
11728 if (ix86_sched_data.ppro.decode[1])
11729 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11730 if (ix86_sched_data.ppro.decode[2])
11731 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11732 fputc ('\n', dump);
11736 /* We're beginning a new block. Initialize data structures as necessary. */
11739 ix86_sched_init (dump, sched_verbose, veclen)
11740 FILE *dump ATTRIBUTE_UNUSED;
11741 int sched_verbose ATTRIBUTE_UNUSED;
11742 int veclen ATTRIBUTE_UNUSED;
11744 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11747 /* Shift INSN to SLOT, and shift everything else down. */
11750 ix86_reorder_insn (insnp, slot)
11757 insnp[0] = insnp[1];
11758 while (++insnp != slot);
11764 ix86_sched_reorder_ppro (ready, e_ready)
11769 enum attr_ppro_uops cur_uops;
11770 int issued_this_cycle;
11774 /* At this point .ppro.decode contains the state of the three
11775 decoders from last "cycle". That is, those insns that were
11776 actually independent. But here we're scheduling for the
11777 decoder, and we may find things that are decodable in the
11780 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11781 issued_this_cycle = 0;
11784 cur_uops = ix86_safe_ppro_uops (*insnp);
11786 /* If the decoders are empty, and we've a complex insn at the
11787 head of the priority queue, let it issue without complaint. */
11788 if (decode[0] == NULL)
11790 if (cur_uops == PPRO_UOPS_MANY)
11792 decode[0] = *insnp;
11796 /* Otherwise, search for a 2-4 uop unsn to issue. */
11797 while (cur_uops != PPRO_UOPS_FEW)
11799 if (insnp == ready)
11801 cur_uops = ix86_safe_ppro_uops (*--insnp);
11804 /* If so, move it to the head of the line. */
11805 if (cur_uops == PPRO_UOPS_FEW)
11806 ix86_reorder_insn (insnp, e_ready);
11808 /* Issue the head of the queue. */
11809 issued_this_cycle = 1;
11810 decode[0] = *e_ready--;
11813 /* Look for simple insns to fill in the other two slots. */
11814 for (i = 1; i < 3; ++i)
11815 if (decode[i] == NULL)
11817 if (ready > e_ready)
11821 cur_uops = ix86_safe_ppro_uops (*insnp);
11822 while (cur_uops != PPRO_UOPS_ONE)
11824 if (insnp == ready)
11826 cur_uops = ix86_safe_ppro_uops (*--insnp);
11829 /* Found one. Move it to the head of the queue and issue it. */
11830 if (cur_uops == PPRO_UOPS_ONE)
11832 ix86_reorder_insn (insnp, e_ready);
11833 decode[i] = *e_ready--;
11834 issued_this_cycle++;
11838 /* ??? Didn't find one. Ideally, here we would do a lazy split
11839 of 2-uop insns, issue one and queue the other. */
11843 if (issued_this_cycle == 0)
11844 issued_this_cycle = 1;
11845 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11848 /* We are about to being issuing insns for this clock cycle.
11849 Override the default sort algorithm to better slot instructions. */
11851 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11852 FILE *dump ATTRIBUTE_UNUSED;
11853 int sched_verbose ATTRIBUTE_UNUSED;
11856 int clock_var ATTRIBUTE_UNUSED;
11858 int n_ready = *n_readyp;
11859 rtx *e_ready = ready + n_ready - 1;
11861 /* Make sure to go ahead and initialize key items in
11862 ix86_sched_data if we are not going to bother trying to
11863 reorder the ready queue. */
11866 ix86_sched_data.ppro.issued_this_cycle = 1;
11875 case PROCESSOR_PENTIUMPRO:
11876 ix86_sched_reorder_ppro (ready, e_ready);
11881 return ix86_issue_rate ();
11884 /* We are about to issue INSN. Return the number of insns left on the
11885 ready queue that can be issued this cycle. */
11888 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11892 int can_issue_more;
11898 return can_issue_more - 1;
11900 case PROCESSOR_PENTIUMPRO:
11902 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11904 if (uops == PPRO_UOPS_MANY)
11907 ix86_dump_ppro_packet (dump);
11908 ix86_sched_data.ppro.decode[0] = insn;
11909 ix86_sched_data.ppro.decode[1] = NULL;
11910 ix86_sched_data.ppro.decode[2] = NULL;
11912 ix86_dump_ppro_packet (dump);
11913 ix86_sched_data.ppro.decode[0] = NULL;
11915 else if (uops == PPRO_UOPS_FEW)
11918 ix86_dump_ppro_packet (dump);
11919 ix86_sched_data.ppro.decode[0] = insn;
11920 ix86_sched_data.ppro.decode[1] = NULL;
11921 ix86_sched_data.ppro.decode[2] = NULL;
11925 for (i = 0; i < 3; ++i)
11926 if (ix86_sched_data.ppro.decode[i] == NULL)
11928 ix86_sched_data.ppro.decode[i] = insn;
11936 ix86_dump_ppro_packet (dump);
11937 ix86_sched_data.ppro.decode[0] = NULL;
11938 ix86_sched_data.ppro.decode[1] = NULL;
11939 ix86_sched_data.ppro.decode[2] = NULL;
11943 return --ix86_sched_data.ppro.issued_this_cycle;
11948 ia32_use_dfa_pipeline_interface ()
11950 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
11955 /* How many alternative schedules to try. This should be as wide as the
11956 scheduling freedom in the DFA, but no wider. Making this value too
11957 large results extra work for the scheduler. */
11960 ia32_multipass_dfa_lookahead ()
11962 if (ix86_cpu == PROCESSOR_PENTIUM)
11969 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11970 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11974 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11976 rtx dstref, srcref, dstreg, srcreg;
11980 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11982 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11986 /* Subroutine of above to actually do the updating by recursively walking
11990 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11992 rtx dstref, srcref, dstreg, srcreg;
11994 enum rtx_code code = GET_CODE (x);
11995 const char *format_ptr = GET_RTX_FORMAT (code);
11998 if (code == MEM && XEXP (x, 0) == dstreg)
11999 MEM_COPY_ATTRIBUTES (x, dstref);
12000 else if (code == MEM && XEXP (x, 0) == srcreg)
12001 MEM_COPY_ATTRIBUTES (x, srcref);
12003 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12005 if (*format_ptr == 'e')
12006 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12008 else if (*format_ptr == 'E')
12009 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12010 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12015 /* Compute the alignment given to a constant that is being placed in memory.
12016 EXP is the constant and ALIGN is the alignment that the object would
12018 The value of this function is used instead of that alignment to align
12022 ix86_constant_alignment (exp, align)
12026 if (TREE_CODE (exp) == REAL_CST)
12028 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12030 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12033 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12040 /* Compute the alignment for a static variable.
12041 TYPE is the data type, and ALIGN is the alignment that
12042 the object would ordinarily have. The value of this function is used
12043 instead of that alignment to align the object. */
12046 ix86_data_alignment (type, align)
12050 if (AGGREGATE_TYPE_P (type)
12051 && TYPE_SIZE (type)
12052 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12053 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12054 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12057 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12058 to 16byte boundary. */
12061 if (AGGREGATE_TYPE_P (type)
12062 && TYPE_SIZE (type)
12063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12064 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12065 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12069 if (TREE_CODE (type) == ARRAY_TYPE)
12071 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12073 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12076 else if (TREE_CODE (type) == COMPLEX_TYPE)
12079 if (TYPE_MODE (type) == DCmode && align < 64)
12081 if (TYPE_MODE (type) == XCmode && align < 128)
12084 else if ((TREE_CODE (type) == RECORD_TYPE
12085 || TREE_CODE (type) == UNION_TYPE
12086 || TREE_CODE (type) == QUAL_UNION_TYPE)
12087 && TYPE_FIELDS (type))
12089 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12091 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12094 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12095 || TREE_CODE (type) == INTEGER_TYPE)
12097 if (TYPE_MODE (type) == DFmode && align < 64)
12099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12106 /* Compute the alignment for a local variable.
12107 TYPE is the data type, and ALIGN is the alignment that
12108 the object would ordinarily have. The value of this macro is used
12109 instead of that alignment to align the object. */
12112 ix86_local_alignment (type, align)
12116 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12117 to 16byte boundary. */
12120 if (AGGREGATE_TYPE_P (type)
12121 && TYPE_SIZE (type)
12122 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12123 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12124 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12127 if (TREE_CODE (type) == ARRAY_TYPE)
12129 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12131 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12134 else if (TREE_CODE (type) == COMPLEX_TYPE)
12136 if (TYPE_MODE (type) == DCmode && align < 64)
12138 if (TYPE_MODE (type) == XCmode && align < 128)
12141 else if ((TREE_CODE (type) == RECORD_TYPE
12142 || TREE_CODE (type) == UNION_TYPE
12143 || TREE_CODE (type) == QUAL_UNION_TYPE)
12144 && TYPE_FIELDS (type))
12146 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12148 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12151 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12152 || TREE_CODE (type) == INTEGER_TYPE)
12155 if (TYPE_MODE (type) == DFmode && align < 64)
12157 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12163 /* Emit RTL insns to initialize the variable parts of a trampoline.
12164 FNADDR is an RTX for the address of the function's pure code.
12165 CXT is an RTX for the static chain value for the function. */
12167 x86_initialize_trampoline (tramp, fnaddr, cxt)
12168 rtx tramp, fnaddr, cxt;
12172 /* Compute offset from the end of the jmp to the target function. */
12173 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12174 plus_constant (tramp, 10),
12175 NULL_RTX, 1, OPTAB_DIRECT);
12176 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12177 gen_int_mode (0xb9, QImode));
12178 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12179 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12180 gen_int_mode (0xe9, QImode));
12181 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12186 /* Try to load address using shorter movl instead of movabs.
12187 We may want to support movq for kernel mode, but kernel does not use
12188 trampolines at the moment. */
12189 if (x86_64_zero_extended_value (fnaddr))
12191 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12192 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12193 gen_int_mode (0xbb41, HImode));
12194 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12195 gen_lowpart (SImode, fnaddr));
12200 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12201 gen_int_mode (0xbb49, HImode));
12202 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12206 /* Load static chain using movabs to r10. */
12207 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12208 gen_int_mode (0xba49, HImode));
12209 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12212 /* Jump to the r11 */
12213 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12214 gen_int_mode (0xff49, HImode));
12215 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12216 gen_int_mode (0xe3, QImode));
12218 if (offset > TRAMPOLINE_SIZE)
12222 #ifdef TRANSFER_FROM_TRAMPOLINE
12223 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12224 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12228 #define def_builtin(MASK, NAME, TYPE, CODE) \
12230 if ((MASK) & target_flags) \
12231 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12232 NULL, NULL_TREE); \
12235 struct builtin_description
12237 const unsigned int mask;
12238 const enum insn_code icode;
12239 const char *const name;
12240 const enum ix86_builtins code;
12241 const enum rtx_code comparison;
12242 const unsigned int flag;
12245 /* Used for builtins that are enabled both by -msse and -msse2. */
12246 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12248 static const struct builtin_description bdesc_comi[] =
12250 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12251 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12252 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12253 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12254 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12255 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12256 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12257 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12258 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12259 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12260 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12261 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12262 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12263 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12264 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12265 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12266 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12267 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12268 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12269 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12270 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12271 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12272 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12273 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12276 static const struct builtin_description bdesc_2arg[] =
12279 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12280 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12281 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12282 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12283 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12284 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12285 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12286 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12288 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12289 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12290 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12291 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12292 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12293 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12294 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12295 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12296 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12297 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12298 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12299 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12300 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12301 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12302 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12303 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12304 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12305 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12306 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12307 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12309 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12310 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12311 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12312 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12314 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12315 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12316 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12317 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12319 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12320 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12321 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12322 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12323 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12326 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12327 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12328 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12329 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12330 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12331 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12333 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12334 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12335 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12336 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12337 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12338 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12339 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12340 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12342 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12343 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12344 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12346 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12347 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12348 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12349 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12351 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12352 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12354 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12355 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12356 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12357 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12358 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12359 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12361 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12362 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12363 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12364 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12366 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12367 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12368 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12369 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12370 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12371 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12374 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12375 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12376 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12378 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12379 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12381 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12382 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12383 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12384 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12385 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12386 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12388 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12389 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12390 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12391 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12392 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12393 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12395 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12396 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12397 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12398 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12400 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12401 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12404 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12405 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12406 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12407 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12408 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12409 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12410 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12411 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12413 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12414 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12415 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12416 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12417 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12418 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12419 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12420 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12421 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12422 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12423 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12424 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12425 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12426 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12427 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12428 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12429 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12430 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12431 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12432 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12434 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12436 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12437 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12439 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12440 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12441 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12442 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12444 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12445 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12446 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12449 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12451 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12452 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12453 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12454 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12455 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12456 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12458 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12459 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12460 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12461 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12462 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12463 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12464 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12465 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12467 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12468 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12469 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12470 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12472 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12475 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12477 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12478 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12480 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12481 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12482 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12483 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12484 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12485 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12487 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12488 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12489 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12490 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12492 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12496 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12497 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12499 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12501 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12505 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12508 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12510 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12511 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12515 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12522 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12523 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12524 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12525 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12529 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12530 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12531 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12534 static const struct builtin_description bdesc_1arg[] =
12536 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12543 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12545 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12548 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12549 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12551 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12553 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12558 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12564 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12567 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12569 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12570 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12571 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12573 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12577 ix86_init_builtins ()
12580 ix86_init_mmx_sse_builtins ();
12583 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12584 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12587 ix86_init_mmx_sse_builtins ()
12589 const struct builtin_description * d;
12592 tree pchar_type_node = build_pointer_type (char_type_node);
12593 tree pfloat_type_node = build_pointer_type (float_type_node);
12594 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12595 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12596 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12599 tree int_ftype_v4sf_v4sf
12600 = build_function_type_list (integer_type_node,
12601 V4SF_type_node, V4SF_type_node, NULL_TREE);
12602 tree v4si_ftype_v4sf_v4sf
12603 = build_function_type_list (V4SI_type_node,
12604 V4SF_type_node, V4SF_type_node, NULL_TREE);
12605 /* MMX/SSE/integer conversions. */
12606 tree int_ftype_v4sf
12607 = build_function_type_list (integer_type_node,
12608 V4SF_type_node, NULL_TREE);
12609 tree int_ftype_v8qi
12610 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12611 tree v4sf_ftype_v4sf_int
12612 = build_function_type_list (V4SF_type_node,
12613 V4SF_type_node, integer_type_node, NULL_TREE);
12614 tree v4sf_ftype_v4sf_v2si
12615 = build_function_type_list (V4SF_type_node,
12616 V4SF_type_node, V2SI_type_node, NULL_TREE);
12617 tree int_ftype_v4hi_int
12618 = build_function_type_list (integer_type_node,
12619 V4HI_type_node, integer_type_node, NULL_TREE);
12620 tree v4hi_ftype_v4hi_int_int
12621 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12622 integer_type_node, integer_type_node,
12624 /* Miscellaneous. */
12625 tree v8qi_ftype_v4hi_v4hi
12626 = build_function_type_list (V8QI_type_node,
12627 V4HI_type_node, V4HI_type_node, NULL_TREE);
12628 tree v4hi_ftype_v2si_v2si
12629 = build_function_type_list (V4HI_type_node,
12630 V2SI_type_node, V2SI_type_node, NULL_TREE);
12631 tree v4sf_ftype_v4sf_v4sf_int
12632 = build_function_type_list (V4SF_type_node,
12633 V4SF_type_node, V4SF_type_node,
12634 integer_type_node, NULL_TREE);
12635 tree v2si_ftype_v4hi_v4hi
12636 = build_function_type_list (V2SI_type_node,
12637 V4HI_type_node, V4HI_type_node, NULL_TREE);
12638 tree v4hi_ftype_v4hi_int
12639 = build_function_type_list (V4HI_type_node,
12640 V4HI_type_node, integer_type_node, NULL_TREE);
12641 tree v4hi_ftype_v4hi_di
12642 = build_function_type_list (V4HI_type_node,
12643 V4HI_type_node, long_long_unsigned_type_node,
12645 tree v2si_ftype_v2si_di
12646 = build_function_type_list (V2SI_type_node,
12647 V2SI_type_node, long_long_unsigned_type_node,
12649 tree void_ftype_void
12650 = build_function_type (void_type_node, void_list_node);
12651 tree void_ftype_unsigned
12652 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12653 tree unsigned_ftype_void
12654 = build_function_type (unsigned_type_node, void_list_node);
12656 = build_function_type (long_long_unsigned_type_node, void_list_node);
12657 tree v4sf_ftype_void
12658 = build_function_type (V4SF_type_node, void_list_node);
12659 tree v2si_ftype_v4sf
12660 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12661 /* Loads/stores. */
12662 tree void_ftype_v8qi_v8qi_pchar
12663 = build_function_type_list (void_type_node,
12664 V8QI_type_node, V8QI_type_node,
12665 pchar_type_node, NULL_TREE);
12666 tree v4sf_ftype_pfloat
12667 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12668 /* @@@ the type is bogus */
12669 tree v4sf_ftype_v4sf_pv2si
12670 = build_function_type_list (V4SF_type_node,
12671 V4SF_type_node, pv2si_type_node, NULL_TREE);
12672 tree void_ftype_pv2si_v4sf
12673 = build_function_type_list (void_type_node,
12674 pv2si_type_node, V4SF_type_node, NULL_TREE);
12675 tree void_ftype_pfloat_v4sf
12676 = build_function_type_list (void_type_node,
12677 pfloat_type_node, V4SF_type_node, NULL_TREE);
12678 tree void_ftype_pdi_di
12679 = build_function_type_list (void_type_node,
12680 pdi_type_node, long_long_unsigned_type_node,
12682 tree void_ftype_pv2di_v2di
12683 = build_function_type_list (void_type_node,
12684 pv2di_type_node, V2DI_type_node, NULL_TREE);
12685 /* Normal vector unops. */
12686 tree v4sf_ftype_v4sf
12687 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12689 /* Normal vector binops. */
12690 tree v4sf_ftype_v4sf_v4sf
12691 = build_function_type_list (V4SF_type_node,
12692 V4SF_type_node, V4SF_type_node, NULL_TREE);
12693 tree v8qi_ftype_v8qi_v8qi
12694 = build_function_type_list (V8QI_type_node,
12695 V8QI_type_node, V8QI_type_node, NULL_TREE);
12696 tree v4hi_ftype_v4hi_v4hi
12697 = build_function_type_list (V4HI_type_node,
12698 V4HI_type_node, V4HI_type_node, NULL_TREE);
12699 tree v2si_ftype_v2si_v2si
12700 = build_function_type_list (V2SI_type_node,
12701 V2SI_type_node, V2SI_type_node, NULL_TREE);
12702 tree di_ftype_di_di
12703 = build_function_type_list (long_long_unsigned_type_node,
12704 long_long_unsigned_type_node,
12705 long_long_unsigned_type_node, NULL_TREE);
12707 tree v2si_ftype_v2sf
12708 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12709 tree v2sf_ftype_v2si
12710 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12711 tree v2si_ftype_v2si
12712 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12713 tree v2sf_ftype_v2sf
12714 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12715 tree v2sf_ftype_v2sf_v2sf
12716 = build_function_type_list (V2SF_type_node,
12717 V2SF_type_node, V2SF_type_node, NULL_TREE);
12718 tree v2si_ftype_v2sf_v2sf
12719 = build_function_type_list (V2SI_type_node,
12720 V2SF_type_node, V2SF_type_node, NULL_TREE);
12721 tree pint_type_node = build_pointer_type (integer_type_node);
12722 tree pdouble_type_node = build_pointer_type (double_type_node);
12723 tree int_ftype_v2df_v2df
12724 = build_function_type_list (integer_type_node,
12725 V2DF_type_node, V2DF_type_node, NULL_TREE);
12728 = build_function_type (intTI_type_node, void_list_node);
12729 tree v2di_ftype_void
12730 = build_function_type (V2DI_type_node, void_list_node);
12731 tree ti_ftype_ti_ti
12732 = build_function_type_list (intTI_type_node,
12733 intTI_type_node, intTI_type_node, NULL_TREE);
12734 tree void_ftype_pvoid
12735 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12737 = build_function_type_list (V2DI_type_node,
12738 long_long_unsigned_type_node, NULL_TREE);
12740 = build_function_type_list (long_long_unsigned_type_node,
12741 V2DI_type_node, NULL_TREE);
12742 tree v4sf_ftype_v4si
12743 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12744 tree v4si_ftype_v4sf
12745 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12746 tree v2df_ftype_v4si
12747 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12748 tree v4si_ftype_v2df
12749 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12750 tree v2si_ftype_v2df
12751 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12752 tree v4sf_ftype_v2df
12753 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12754 tree v2df_ftype_v2si
12755 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12756 tree v2df_ftype_v4sf
12757 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12758 tree int_ftype_v2df
12759 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12760 tree v2df_ftype_v2df_int
12761 = build_function_type_list (V2DF_type_node,
12762 V2DF_type_node, integer_type_node, NULL_TREE);
12763 tree v4sf_ftype_v4sf_v2df
12764 = build_function_type_list (V4SF_type_node,
12765 V4SF_type_node, V2DF_type_node, NULL_TREE);
12766 tree v2df_ftype_v2df_v4sf
12767 = build_function_type_list (V2DF_type_node,
12768 V2DF_type_node, V4SF_type_node, NULL_TREE);
12769 tree v2df_ftype_v2df_v2df_int
12770 = build_function_type_list (V2DF_type_node,
12771 V2DF_type_node, V2DF_type_node,
12774 tree v2df_ftype_v2df_pv2si
12775 = build_function_type_list (V2DF_type_node,
12776 V2DF_type_node, pv2si_type_node, NULL_TREE);
12777 tree void_ftype_pv2si_v2df
12778 = build_function_type_list (void_type_node,
12779 pv2si_type_node, V2DF_type_node, NULL_TREE);
12780 tree void_ftype_pdouble_v2df
12781 = build_function_type_list (void_type_node,
12782 pdouble_type_node, V2DF_type_node, NULL_TREE);
12783 tree void_ftype_pint_int
12784 = build_function_type_list (void_type_node,
12785 pint_type_node, integer_type_node, NULL_TREE);
12786 tree void_ftype_v16qi_v16qi_pchar
12787 = build_function_type_list (void_type_node,
12788 V16QI_type_node, V16QI_type_node,
12789 pchar_type_node, NULL_TREE);
12790 tree v2df_ftype_pdouble
12791 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12792 tree v2df_ftype_v2df_v2df
12793 = build_function_type_list (V2DF_type_node,
12794 V2DF_type_node, V2DF_type_node, NULL_TREE);
12795 tree v16qi_ftype_v16qi_v16qi
12796 = build_function_type_list (V16QI_type_node,
12797 V16QI_type_node, V16QI_type_node, NULL_TREE);
12798 tree v8hi_ftype_v8hi_v8hi
12799 = build_function_type_list (V8HI_type_node,
12800 V8HI_type_node, V8HI_type_node, NULL_TREE);
12801 tree v4si_ftype_v4si_v4si
12802 = build_function_type_list (V4SI_type_node,
12803 V4SI_type_node, V4SI_type_node, NULL_TREE);
12804 tree v2di_ftype_v2di_v2di
12805 = build_function_type_list (V2DI_type_node,
12806 V2DI_type_node, V2DI_type_node, NULL_TREE);
12807 tree v2di_ftype_v2df_v2df
12808 = build_function_type_list (V2DI_type_node,
12809 V2DF_type_node, V2DF_type_node, NULL_TREE);
12810 tree v2df_ftype_v2df
12811 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12812 tree v2df_ftype_double
12813 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12814 tree v2df_ftype_double_double
12815 = build_function_type_list (V2DF_type_node,
12816 double_type_node, double_type_node, NULL_TREE);
12817 tree int_ftype_v8hi_int
12818 = build_function_type_list (integer_type_node,
12819 V8HI_type_node, integer_type_node, NULL_TREE);
12820 tree v8hi_ftype_v8hi_int_int
12821 = build_function_type_list (V8HI_type_node,
12822 V8HI_type_node, integer_type_node,
12823 integer_type_node, NULL_TREE);
12824 tree v2di_ftype_v2di_int
12825 = build_function_type_list (V2DI_type_node,
12826 V2DI_type_node, integer_type_node, NULL_TREE);
12827 tree v4si_ftype_v4si_int
12828 = build_function_type_list (V4SI_type_node,
12829 V4SI_type_node, integer_type_node, NULL_TREE);
12830 tree v8hi_ftype_v8hi_int
12831 = build_function_type_list (V8HI_type_node,
12832 V8HI_type_node, integer_type_node, NULL_TREE);
12833 tree v8hi_ftype_v8hi_v2di
12834 = build_function_type_list (V8HI_type_node,
12835 V8HI_type_node, V2DI_type_node, NULL_TREE);
12836 tree v4si_ftype_v4si_v2di
12837 = build_function_type_list (V4SI_type_node,
12838 V4SI_type_node, V2DI_type_node, NULL_TREE);
12839 tree v4si_ftype_v8hi_v8hi
12840 = build_function_type_list (V4SI_type_node,
12841 V8HI_type_node, V8HI_type_node, NULL_TREE);
12842 tree di_ftype_v8qi_v8qi
12843 = build_function_type_list (long_long_unsigned_type_node,
12844 V8QI_type_node, V8QI_type_node, NULL_TREE);
12845 tree v2di_ftype_v16qi_v16qi
12846 = build_function_type_list (V2DI_type_node,
12847 V16QI_type_node, V16QI_type_node, NULL_TREE);
12848 tree int_ftype_v16qi
12849 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12850 tree v16qi_ftype_pchar
12851 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12852 tree void_ftype_pchar_v16qi
12853 = build_function_type_list (void_type_node,
12854 pchar_type_node, V16QI_type_node, NULL_TREE);
12855 tree v4si_ftype_pchar
12856 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12857 tree void_ftype_pchar_v4si
12858 = build_function_type_list (void_type_node,
12859 pchar_type_node, V4SI_type_node, NULL_TREE);
12860 tree v2di_ftype_v2di
12861 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12863 /* Add all builtins that are more or less simple operations on two
12865 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12867 /* Use one of the operands; the target can have a different mode for
12868 mask-generating compares. */
12869 enum machine_mode mode;
12874 mode = insn_data[d->icode].operand[1].mode;
12879 type = v16qi_ftype_v16qi_v16qi;
12882 type = v8hi_ftype_v8hi_v8hi;
12885 type = v4si_ftype_v4si_v4si;
12888 type = v2di_ftype_v2di_v2di;
12891 type = v2df_ftype_v2df_v2df;
12894 type = ti_ftype_ti_ti;
12897 type = v4sf_ftype_v4sf_v4sf;
12900 type = v8qi_ftype_v8qi_v8qi;
12903 type = v4hi_ftype_v4hi_v4hi;
12906 type = v2si_ftype_v2si_v2si;
12909 type = di_ftype_di_di;
12916 /* Override for comparisons. */
12917 if (d->icode == CODE_FOR_maskcmpv4sf3
12918 || d->icode == CODE_FOR_maskncmpv4sf3
12919 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12920 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12921 type = v4si_ftype_v4sf_v4sf;
12923 if (d->icode == CODE_FOR_maskcmpv2df3
12924 || d->icode == CODE_FOR_maskncmpv2df3
12925 || d->icode == CODE_FOR_vmmaskcmpv2df3
12926 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12927 type = v2di_ftype_v2df_v2df;
12929 def_builtin (d->mask, d->name, type, d->code);
12932 /* Add the remaining MMX insns with somewhat more complicated types. */
12933 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12934 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12935 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12936 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12937 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12938 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12939 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12941 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12942 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12943 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12945 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12946 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12948 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12949 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12951 /* comi/ucomi insns. */
12952 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12953 if (d->mask == MASK_SSE2)
12954 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12956 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12958 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12959 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12960 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12962 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12963 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12964 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12965 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12966 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12967 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12969 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12970 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12972 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12974 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12975 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12976 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12977 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12978 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12979 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12981 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12982 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12983 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12984 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12986 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12987 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12988 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12989 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12991 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12993 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12995 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12996 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12997 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12998 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12999 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13000 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13002 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13004 /* Original 3DNow! */
13005 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13006 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13007 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13008 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13009 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13010 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13011 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13012 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13013 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13014 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13015 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13016 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13017 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13018 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13019 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13020 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13021 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13022 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13023 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13024 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13026 /* 3DNow! extension as used in the Athlon CPU. */
13027 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13028 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13029 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13030 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13031 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13032 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13034 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13037 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13038 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13040 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13041 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13042 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13044 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
13045 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
13046 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
13047 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13048 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13049 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13051 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13052 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13053 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13054 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13056 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13057 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13058 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13059 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13060 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13062 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13063 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13064 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13065 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13067 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13068 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13070 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13072 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13073 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13075 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13076 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13079 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13081 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13083 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13084 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13086 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13087 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13088 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13090 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13094 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13095 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13096 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13097 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
13098 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
13099 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13100 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13102 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
13103 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13104 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13106 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
13107 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13110 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13111 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13114 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13116 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13118 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13120 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13124 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13125 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13127 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13128 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13129 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13130 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13132 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13133 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13137 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13140 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13143 /* Errors in the source file can cause expand_expr to return const0_rtx
13144 where we expect a vector. To avoid crashing, use one of the vector
13145 clear instructions. */
13147 safe_vector_operand (x, mode)
13149 enum machine_mode mode;
13151 if (x != const0_rtx)
13153 x = gen_reg_rtx (mode);
13155 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13156 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13157 : gen_rtx_SUBREG (DImode, x, 0)));
13159 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13160 : gen_rtx_SUBREG (V4SFmode, x, 0),
13161 CONST0_RTX (V4SFmode)));
13165 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13168 ix86_expand_binop_builtin (icode, arglist, target)
13169 enum insn_code icode;
13174 tree arg0 = TREE_VALUE (arglist);
13175 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13176 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13177 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13178 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13179 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13180 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13182 if (VECTOR_MODE_P (mode0))
13183 op0 = safe_vector_operand (op0, mode0);
13184 if (VECTOR_MODE_P (mode1))
13185 op1 = safe_vector_operand (op1, mode1);
13188 || GET_MODE (target) != tmode
13189 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13190 target = gen_reg_rtx (tmode);
13192 /* In case the insn wants input operands in modes different from
13193 the result, abort. */
13194 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13198 op0 = copy_to_mode_reg (mode0, op0);
13199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13200 op1 = copy_to_mode_reg (mode1, op1);
13202 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13203 yet one of the two must not be a memory. This is normally enforced
13204 by expanders, but we didn't bother to create one here. */
13205 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13206 op0 = copy_to_mode_reg (mode0, op0);
13208 pat = GEN_FCN (icode) (target, op0, op1);
13215 /* Subroutine of ix86_expand_builtin to take care of stores. */
13218 ix86_expand_store_builtin (icode, arglist)
13219 enum insn_code icode;
13223 tree arg0 = TREE_VALUE (arglist);
13224 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13225 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13226 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13227 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13228 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13230 if (VECTOR_MODE_P (mode1))
13231 op1 = safe_vector_operand (op1, mode1);
13233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13235 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13236 op1 = copy_to_mode_reg (mode1, op1);
13238 pat = GEN_FCN (icode) (op0, op1);
13244 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13247 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13248 enum insn_code icode;
13254 tree arg0 = TREE_VALUE (arglist);
13255 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13256 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13257 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13260 || GET_MODE (target) != tmode
13261 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13262 target = gen_reg_rtx (tmode);
13264 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13267 if (VECTOR_MODE_P (mode0))
13268 op0 = safe_vector_operand (op0, mode0);
13270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13271 op0 = copy_to_mode_reg (mode0, op0);
13274 pat = GEN_FCN (icode) (target, op0);
13281 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13282 sqrtss, rsqrtss, rcpss. */
13285 ix86_expand_unop1_builtin (icode, arglist, target)
13286 enum insn_code icode;
13291 tree arg0 = TREE_VALUE (arglist);
13292 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13293 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13294 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13297 || GET_MODE (target) != tmode
13298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13299 target = gen_reg_rtx (tmode);
13301 if (VECTOR_MODE_P (mode0))
13302 op0 = safe_vector_operand (op0, mode0);
13304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13305 op0 = copy_to_mode_reg (mode0, op0);
13308 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13309 op1 = copy_to_mode_reg (mode0, op1);
13311 pat = GEN_FCN (icode) (target, op0, op1);
13318 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13321 ix86_expand_sse_compare (d, arglist, target)
13322 const struct builtin_description *d;
13327 tree arg0 = TREE_VALUE (arglist);
13328 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13329 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13330 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13332 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13333 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13334 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13335 enum rtx_code comparison = d->comparison;
13337 if (VECTOR_MODE_P (mode0))
13338 op0 = safe_vector_operand (op0, mode0);
13339 if (VECTOR_MODE_P (mode1))
13340 op1 = safe_vector_operand (op1, mode1);
13342 /* Swap operands if we have a comparison that isn't available in
13346 rtx tmp = gen_reg_rtx (mode1);
13347 emit_move_insn (tmp, op1);
13353 || GET_MODE (target) != tmode
13354 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13355 target = gen_reg_rtx (tmode);
13357 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13358 op0 = copy_to_mode_reg (mode0, op0);
13359 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13360 op1 = copy_to_mode_reg (mode1, op1);
13362 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13363 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13370 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13373 ix86_expand_sse_comi (d, arglist, target)
13374 const struct builtin_description *d;
13379 tree arg0 = TREE_VALUE (arglist);
13380 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13381 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13382 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13384 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13385 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13386 enum rtx_code comparison = d->comparison;
13388 if (VECTOR_MODE_P (mode0))
13389 op0 = safe_vector_operand (op0, mode0);
13390 if (VECTOR_MODE_P (mode1))
13391 op1 = safe_vector_operand (op1, mode1);
13393 /* Swap operands if we have a comparison that isn't available in
13402 target = gen_reg_rtx (SImode);
13403 emit_move_insn (target, const0_rtx);
13404 target = gen_rtx_SUBREG (QImode, target, 0);
13406 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13407 op0 = copy_to_mode_reg (mode0, op0);
13408 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13409 op1 = copy_to_mode_reg (mode1, op1);
13411 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13412 pat = GEN_FCN (d->icode) (op0, op1);
13416 emit_insn (gen_rtx_SET (VOIDmode,
13417 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13418 gen_rtx_fmt_ee (comparison, QImode,
13422 return SUBREG_REG (target);
13425 /* Expand an expression EXP that calls a built-in function,
13426 with result going to TARGET if that's convenient
13427 (and in mode MODE if that's convenient).
13428 SUBTARGET may be used as the target for computing one of EXP's operands.
13429 IGNORE is nonzero if the value is to be ignored. */
13432 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13435 rtx subtarget ATTRIBUTE_UNUSED;
13436 enum machine_mode mode ATTRIBUTE_UNUSED;
13437 int ignore ATTRIBUTE_UNUSED;
13439 const struct builtin_description *d;
13441 enum insn_code icode;
13442 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13443 tree arglist = TREE_OPERAND (exp, 1);
13444 tree arg0, arg1, arg2;
13445 rtx op0, op1, op2, pat;
13446 enum machine_mode tmode, mode0, mode1, mode2;
13447 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13451 case IX86_BUILTIN_EMMS:
13452 emit_insn (gen_emms ());
13455 case IX86_BUILTIN_SFENCE:
13456 emit_insn (gen_sfence ());
13459 case IX86_BUILTIN_PEXTRW:
13460 case IX86_BUILTIN_PEXTRW128:
13461 icode = (fcode == IX86_BUILTIN_PEXTRW
13462 ? CODE_FOR_mmx_pextrw
13463 : CODE_FOR_sse2_pextrw);
13464 arg0 = TREE_VALUE (arglist);
13465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13466 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13467 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13468 tmode = insn_data[icode].operand[0].mode;
13469 mode0 = insn_data[icode].operand[1].mode;
13470 mode1 = insn_data[icode].operand[2].mode;
13472 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13473 op0 = copy_to_mode_reg (mode0, op0);
13474 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13476 /* @@@ better error message */
13477 error ("selector must be an immediate");
13478 return gen_reg_rtx (tmode);
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13484 pat = GEN_FCN (icode) (target, op0, op1);
13490 case IX86_BUILTIN_PINSRW:
13491 case IX86_BUILTIN_PINSRW128:
13492 icode = (fcode == IX86_BUILTIN_PINSRW
13493 ? CODE_FOR_mmx_pinsrw
13494 : CODE_FOR_sse2_pinsrw);
13495 arg0 = TREE_VALUE (arglist);
13496 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13497 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13498 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13499 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13500 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13501 tmode = insn_data[icode].operand[0].mode;
13502 mode0 = insn_data[icode].operand[1].mode;
13503 mode1 = insn_data[icode].operand[2].mode;
13504 mode2 = insn_data[icode].operand[3].mode;
13506 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13507 op0 = copy_to_mode_reg (mode0, op0);
13508 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13509 op1 = copy_to_mode_reg (mode1, op1);
13510 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13512 /* @@@ better error message */
13513 error ("selector must be an immediate");
13517 || GET_MODE (target) != tmode
13518 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13519 target = gen_reg_rtx (tmode);
13520 pat = GEN_FCN (icode) (target, op0, op1, op2);
13526 case IX86_BUILTIN_MASKMOVQ:
13527 case IX86_BUILTIN_MASKMOVDQU:
13528 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13529 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13530 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13531 : CODE_FOR_sse2_maskmovdqu));
13532 /* Note the arg order is different from the operand order. */
13533 arg1 = TREE_VALUE (arglist);
13534 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13535 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13536 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13537 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13538 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13539 mode0 = insn_data[icode].operand[0].mode;
13540 mode1 = insn_data[icode].operand[1].mode;
13541 mode2 = insn_data[icode].operand[2].mode;
13543 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13544 op0 = copy_to_mode_reg (mode0, op0);
13545 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13546 op1 = copy_to_mode_reg (mode1, op1);
13547 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13548 op2 = copy_to_mode_reg (mode2, op2);
13549 pat = GEN_FCN (icode) (op0, op1, op2);
13555 case IX86_BUILTIN_SQRTSS:
13556 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13557 case IX86_BUILTIN_RSQRTSS:
13558 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13559 case IX86_BUILTIN_RCPSS:
13560 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13562 case IX86_BUILTIN_LOADAPS:
13563 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13565 case IX86_BUILTIN_LOADUPS:
13566 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13568 case IX86_BUILTIN_STOREAPS:
13569 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13571 case IX86_BUILTIN_STOREUPS:
13572 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13574 case IX86_BUILTIN_LOADSS:
13575 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13577 case IX86_BUILTIN_STORESS:
13578 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13580 case IX86_BUILTIN_LOADHPS:
13581 case IX86_BUILTIN_LOADLPS:
13582 case IX86_BUILTIN_LOADHPD:
13583 case IX86_BUILTIN_LOADLPD:
13584 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13585 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13586 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13587 : CODE_FOR_sse2_movlpd);
13588 arg0 = TREE_VALUE (arglist);
13589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13590 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13591 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13592 tmode = insn_data[icode].operand[0].mode;
13593 mode0 = insn_data[icode].operand[1].mode;
13594 mode1 = insn_data[icode].operand[2].mode;
13596 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13597 op0 = copy_to_mode_reg (mode0, op0);
13598 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13600 || GET_MODE (target) != tmode
13601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13602 target = gen_reg_rtx (tmode);
13603 pat = GEN_FCN (icode) (target, op0, op1);
13609 case IX86_BUILTIN_STOREHPS:
13610 case IX86_BUILTIN_STORELPS:
13611 case IX86_BUILTIN_STOREHPD:
13612 case IX86_BUILTIN_STORELPD:
13613 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13614 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13615 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13616 : CODE_FOR_sse2_movlpd);
13617 arg0 = TREE_VALUE (arglist);
13618 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13619 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13620 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13621 mode0 = insn_data[icode].operand[1].mode;
13622 mode1 = insn_data[icode].operand[2].mode;
13624 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13625 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13626 op1 = copy_to_mode_reg (mode1, op1);
13628 pat = GEN_FCN (icode) (op0, op0, op1);
13634 case IX86_BUILTIN_MOVNTPS:
13635 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13636 case IX86_BUILTIN_MOVNTQ:
13637 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13639 case IX86_BUILTIN_LDMXCSR:
13640 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13641 target = assign_386_stack_local (SImode, 0);
13642 emit_move_insn (target, op0);
13643 emit_insn (gen_ldmxcsr (target));
13646 case IX86_BUILTIN_STMXCSR:
13647 target = assign_386_stack_local (SImode, 0);
13648 emit_insn (gen_stmxcsr (target));
13649 return copy_to_mode_reg (SImode, target);
13651 case IX86_BUILTIN_SHUFPS:
13652 case IX86_BUILTIN_SHUFPD:
13653 icode = (fcode == IX86_BUILTIN_SHUFPS
13654 ? CODE_FOR_sse_shufps
13655 : CODE_FOR_sse2_shufpd);
13656 arg0 = TREE_VALUE (arglist);
13657 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13658 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13659 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13660 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13661 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13662 tmode = insn_data[icode].operand[0].mode;
13663 mode0 = insn_data[icode].operand[1].mode;
13664 mode1 = insn_data[icode].operand[2].mode;
13665 mode2 = insn_data[icode].operand[3].mode;
13667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13670 op1 = copy_to_mode_reg (mode1, op1);
13671 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13673 /* @@@ better error message */
13674 error ("mask must be an immediate");
13675 return gen_reg_rtx (tmode);
13678 || GET_MODE (target) != tmode
13679 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13680 target = gen_reg_rtx (tmode);
13681 pat = GEN_FCN (icode) (target, op0, op1, op2);
13687 case IX86_BUILTIN_PSHUFW:
13688 case IX86_BUILTIN_PSHUFD:
13689 case IX86_BUILTIN_PSHUFHW:
13690 case IX86_BUILTIN_PSHUFLW:
13691 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13692 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13693 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13694 : CODE_FOR_mmx_pshufw);
13695 arg0 = TREE_VALUE (arglist);
13696 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13697 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13698 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13699 tmode = insn_data[icode].operand[0].mode;
13700 mode1 = insn_data[icode].operand[1].mode;
13701 mode2 = insn_data[icode].operand[2].mode;
13703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13704 op0 = copy_to_mode_reg (mode1, op0);
13705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13707 /* @@@ better error message */
13708 error ("mask must be an immediate");
13712 || GET_MODE (target) != tmode
13713 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13714 target = gen_reg_rtx (tmode);
13715 pat = GEN_FCN (icode) (target, op0, op1);
13721 case IX86_BUILTIN_PSLLDQI128:
13722 case IX86_BUILTIN_PSRLDQI128:
13723 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13724 : CODE_FOR_sse2_lshrti3);
13725 arg0 = TREE_VALUE (arglist);
13726 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13729 tmode = insn_data[icode].operand[0].mode;
13730 mode1 = insn_data[icode].operand[1].mode;
13731 mode2 = insn_data[icode].operand[2].mode;
13733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13735 op0 = copy_to_reg (op0);
13736 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13740 error ("shift must be an immediate");
13743 target = gen_reg_rtx (V2DImode);
13744 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13750 case IX86_BUILTIN_FEMMS:
13751 emit_insn (gen_femms ());
13754 case IX86_BUILTIN_PAVGUSB:
13755 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13757 case IX86_BUILTIN_PF2ID:
13758 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13760 case IX86_BUILTIN_PFACC:
13761 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13763 case IX86_BUILTIN_PFADD:
13764 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13766 case IX86_BUILTIN_PFCMPEQ:
13767 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13769 case IX86_BUILTIN_PFCMPGE:
13770 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13772 case IX86_BUILTIN_PFCMPGT:
13773 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13775 case IX86_BUILTIN_PFMAX:
13776 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13778 case IX86_BUILTIN_PFMIN:
13779 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13781 case IX86_BUILTIN_PFMUL:
13782 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13784 case IX86_BUILTIN_PFRCP:
13785 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13787 case IX86_BUILTIN_PFRCPIT1:
13788 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13790 case IX86_BUILTIN_PFRCPIT2:
13791 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13793 case IX86_BUILTIN_PFRSQIT1:
13794 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13796 case IX86_BUILTIN_PFRSQRT:
13797 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13799 case IX86_BUILTIN_PFSUB:
13800 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13802 case IX86_BUILTIN_PFSUBR:
13803 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13805 case IX86_BUILTIN_PI2FD:
13806 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13808 case IX86_BUILTIN_PMULHRW:
13809 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13811 case IX86_BUILTIN_PF2IW:
13812 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13814 case IX86_BUILTIN_PFNACC:
13815 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13817 case IX86_BUILTIN_PFPNACC:
13818 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13820 case IX86_BUILTIN_PI2FW:
13821 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13823 case IX86_BUILTIN_PSWAPDSI:
13824 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13826 case IX86_BUILTIN_PSWAPDSF:
13827 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13829 case IX86_BUILTIN_SSE_ZERO:
13830 target = gen_reg_rtx (V4SFmode);
13831 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13834 case IX86_BUILTIN_MMX_ZERO:
13835 target = gen_reg_rtx (DImode);
13836 emit_insn (gen_mmx_clrdi (target));
13839 case IX86_BUILTIN_CLRTI:
13840 target = gen_reg_rtx (V2DImode);
13841 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13845 case IX86_BUILTIN_SQRTSD:
13846 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13847 case IX86_BUILTIN_LOADAPD:
13848 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13849 case IX86_BUILTIN_LOADUPD:
13850 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13852 case IX86_BUILTIN_STOREAPD:
13853 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13854 case IX86_BUILTIN_STOREUPD:
13855 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13857 case IX86_BUILTIN_LOADSD:
13858 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13860 case IX86_BUILTIN_STORESD:
13861 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13863 case IX86_BUILTIN_SETPD1:
13864 target = assign_386_stack_local (DFmode, 0);
13865 arg0 = TREE_VALUE (arglist);
13866 emit_move_insn (adjust_address (target, DFmode, 0),
13867 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13868 op0 = gen_reg_rtx (V2DFmode);
13869 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13870 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13873 case IX86_BUILTIN_SETPD:
13874 target = assign_386_stack_local (V2DFmode, 0);
13875 arg0 = TREE_VALUE (arglist);
13876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13877 emit_move_insn (adjust_address (target, DFmode, 0),
13878 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13879 emit_move_insn (adjust_address (target, DFmode, 8),
13880 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13881 op0 = gen_reg_rtx (V2DFmode);
13882 emit_insn (gen_sse2_movapd (op0, target));
13885 case IX86_BUILTIN_LOADRPD:
13886 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13887 gen_reg_rtx (V2DFmode), 1);
13888 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13891 case IX86_BUILTIN_LOADPD1:
13892 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13893 gen_reg_rtx (V2DFmode), 1);
13894 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13897 case IX86_BUILTIN_STOREPD1:
13898 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13899 case IX86_BUILTIN_STORERPD:
13900 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13902 case IX86_BUILTIN_CLRPD:
13903 target = gen_reg_rtx (V2DFmode);
13904 emit_insn (gen_sse_clrv2df (target));
13907 case IX86_BUILTIN_MFENCE:
13908 emit_insn (gen_sse2_mfence ());
13910 case IX86_BUILTIN_LFENCE:
13911 emit_insn (gen_sse2_lfence ());
13914 case IX86_BUILTIN_CLFLUSH:
13915 arg0 = TREE_VALUE (arglist);
13916 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13917 icode = CODE_FOR_sse2_clflush;
13918 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13919 op0 = copy_to_mode_reg (Pmode, op0);
13921 emit_insn (gen_sse2_clflush (op0));
13924 case IX86_BUILTIN_MOVNTPD:
13925 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13926 case IX86_BUILTIN_MOVNTDQ:
13927 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13928 case IX86_BUILTIN_MOVNTI:
13929 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13931 case IX86_BUILTIN_LOADDQA:
13932 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13933 case IX86_BUILTIN_LOADDQU:
13934 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13935 case IX86_BUILTIN_LOADD:
13936 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13938 case IX86_BUILTIN_STOREDQA:
13939 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13940 case IX86_BUILTIN_STOREDQU:
13941 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13942 case IX86_BUILTIN_STORED:
13943 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13949 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13950 if (d->code == fcode)
13952 /* Compares are treated specially. */
13953 if (d->icode == CODE_FOR_maskcmpv4sf3
13954 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13955 || d->icode == CODE_FOR_maskncmpv4sf3
13956 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13957 || d->icode == CODE_FOR_maskcmpv2df3
13958 || d->icode == CODE_FOR_vmmaskcmpv2df3
13959 || d->icode == CODE_FOR_maskncmpv2df3
13960 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13961 return ix86_expand_sse_compare (d, arglist, target);
13963 return ix86_expand_binop_builtin (d->icode, arglist, target);
13966 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13967 if (d->code == fcode)
13968 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13970 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13971 if (d->code == fcode)
13972 return ix86_expand_sse_comi (d, arglist, target);
13974 /* @@@ Should really do something sensible here. */
13978 /* Store OPERAND to the memory after reload is completed. This means
13979 that we can't easily use assign_stack_local. */
13981 ix86_force_to_memory (mode, operand)
13982 enum machine_mode mode;
13986 if (!reload_completed)
13988 if (TARGET_64BIT && TARGET_RED_ZONE)
13990 result = gen_rtx_MEM (mode,
13991 gen_rtx_PLUS (Pmode,
13993 GEN_INT (-RED_ZONE_SIZE)));
13994 emit_move_insn (result, operand);
13996 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14002 operand = gen_lowpart (DImode, operand);
14006 gen_rtx_SET (VOIDmode,
14007 gen_rtx_MEM (DImode,
14008 gen_rtx_PRE_DEC (DImode,
14009 stack_pointer_rtx)),
14015 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14024 split_di (&operand, 1, operands, operands + 1);
14026 gen_rtx_SET (VOIDmode,
14027 gen_rtx_MEM (SImode,
14028 gen_rtx_PRE_DEC (Pmode,
14029 stack_pointer_rtx)),
14032 gen_rtx_SET (VOIDmode,
14033 gen_rtx_MEM (SImode,
14034 gen_rtx_PRE_DEC (Pmode,
14035 stack_pointer_rtx)),
14040 /* It is better to store HImodes as SImodes. */
14041 if (!TARGET_PARTIAL_REG_STALL)
14042 operand = gen_lowpart (SImode, operand);
14046 gen_rtx_SET (VOIDmode,
14047 gen_rtx_MEM (GET_MODE (operand),
14048 gen_rtx_PRE_DEC (SImode,
14049 stack_pointer_rtx)),
14055 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14060 /* Free operand from the memory. */
14062 ix86_free_from_memory (mode)
14063 enum machine_mode mode;
14065 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14069 if (mode == DImode || TARGET_64BIT)
14071 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14075 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14076 to pop or add instruction if registers are available. */
14077 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14078 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14083 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14084 QImode must go into class Q_REGS.
14085 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14086 movdf to do mem-to-mem moves through integer regs. */
14088 ix86_preferred_reload_class (x, class)
14090 enum reg_class class;
14092 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14094 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14096 /* SSE can't load any constant directly yet. */
14097 if (SSE_CLASS_P (class))
14099 /* Floats can load 0 and 1. */
14100 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14102 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14103 if (MAYBE_SSE_CLASS_P (class))
14104 return (reg_class_subset_p (class, GENERAL_REGS)
14105 ? GENERAL_REGS : FLOAT_REGS);
14109 /* General regs can load everything. */
14110 if (reg_class_subset_p (class, GENERAL_REGS))
14111 return GENERAL_REGS;
14112 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14113 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14116 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14118 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14123 /* If we are copying between general and FP registers, we need a memory
14124 location. The same is true for SSE and MMX registers.
14126 The macro can't work reliably when one of the CLASSES is class containing
14127 registers from multiple units (SSE, MMX, integer). We avoid this by never
14128 combining those units in single alternative in the machine description.
14129 Ensure that this constraint holds to avoid unexpected surprises.
14131 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14132 enforce these sanity checks. */
14134 ix86_secondary_memory_needed (class1, class2, mode, strict)
14135 enum reg_class class1, class2;
14136 enum machine_mode mode;
14139 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14140 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14141 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14142 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14143 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14144 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14151 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14152 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14153 && (mode) != SImode)
14154 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14155 && (mode) != SImode));
14157 /* Return the cost of moving data from a register in class CLASS1 to
14158 one in class CLASS2.
14160 It is not required that the cost always equal 2 when FROM is the same as TO;
14161 on some machines it is expensive to move between registers if they are not
14162 general registers. */
14164 ix86_register_move_cost (mode, class1, class2)
14165 enum machine_mode mode;
14166 enum reg_class class1, class2;
14168 /* In case we require secondary memory, compute cost of the store followed
14169 by load. In order to avoid bad register allocation choices, we need
14170 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14172 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14176 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14177 MEMORY_MOVE_COST (mode, class1, 1));
14178 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14179 MEMORY_MOVE_COST (mode, class2, 1));
14181 /* In case of copying from general_purpose_register we may emit multiple
14182 stores followed by single load causing memory size mismatch stall.
14183 Count this as arbitarily high cost of 20. */
14184 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14187 /* In the case of FP/MMX moves, the registers actually overlap, and we
14188 have to switch modes in order to treat them differently. */
14189 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14190 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14196 /* Moves between SSE/MMX and integer unit are expensive. */
14197 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14198 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14199 return ix86_cost->mmxsse_to_integer;
14200 if (MAYBE_FLOAT_CLASS_P (class1))
14201 return ix86_cost->fp_move;
14202 if (MAYBE_SSE_CLASS_P (class1))
14203 return ix86_cost->sse_move;
14204 if (MAYBE_MMX_CLASS_P (class1))
14205 return ix86_cost->mmx_move;
14209 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14211 ix86_hard_regno_mode_ok (regno, mode)
14213 enum machine_mode mode;
14215 /* Flags and only flags can only hold CCmode values. */
14216 if (CC_REGNO_P (regno))
14217 return GET_MODE_CLASS (mode) == MODE_CC;
14218 if (GET_MODE_CLASS (mode) == MODE_CC
14219 || GET_MODE_CLASS (mode) == MODE_RANDOM
14220 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14222 if (FP_REGNO_P (regno))
14223 return VALID_FP_MODE_P (mode);
14224 if (SSE_REGNO_P (regno))
14225 return VALID_SSE_REG_MODE (mode);
14226 if (MMX_REGNO_P (regno))
14227 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14228 /* We handle both integer and floats in the general purpose registers.
14229 In future we should be able to handle vector modes as well. */
14230 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14232 /* Take care for QImode values - they can be in non-QI regs, but then
14233 they do cause partial register stalls. */
14234 if (regno < 4 || mode != QImode || TARGET_64BIT)
14236 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14239 /* Return the cost of moving data of mode M between a
14240 register and memory. A value of 2 is the default; this cost is
14241 relative to those in `REGISTER_MOVE_COST'.
14243 If moving between registers and memory is more expensive than
14244 between two registers, you should define this macro to express the
14247 Model also increased moving costs of QImode registers in non
14251 ix86_memory_move_cost (mode, class, in)
14252 enum machine_mode mode;
14253 enum reg_class class;
14256 if (FLOAT_CLASS_P (class))
14274 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14276 if (SSE_CLASS_P (class))
14279 switch (GET_MODE_SIZE (mode))
14293 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14295 if (MMX_CLASS_P (class))
14298 switch (GET_MODE_SIZE (mode))
14309 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14311 switch (GET_MODE_SIZE (mode))
14315 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14316 : ix86_cost->movzbl_load);
14318 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14319 : ix86_cost->int_store[0] + 4);
14322 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14324 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14325 if (mode == TFmode)
14327 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14328 * ((int) GET_MODE_SIZE (mode)
14329 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14333 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14335 ix86_svr3_asm_out_constructor (symbol, priority)
14337 int priority ATTRIBUTE_UNUSED;
14340 fputs ("\tpushl $", asm_out_file);
14341 assemble_name (asm_out_file, XSTR (symbol, 0));
14342 fputc ('\n', asm_out_file);
14348 static int current_machopic_label_num;
14350 /* Given a symbol name and its associated stub, write out the
14351 definition of the stub. */
14354 machopic_output_stub (file, symb, stub)
14356 const char *symb, *stub;
14358 unsigned int length;
14359 char *binder_name, *symbol_name, lazy_ptr_name[32];
14360 int label = ++current_machopic_label_num;
14362 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14363 symb = (*targetm.strip_name_encoding) (symb);
14365 length = strlen (stub);
14366 binder_name = alloca (length + 32);
14367 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14369 length = strlen (symb);
14370 symbol_name = alloca (length + 32);
14371 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14373 sprintf (lazy_ptr_name, "L%d$lz", label);
14376 machopic_picsymbol_stub_section ();
14378 machopic_symbol_stub_section ();
14380 fprintf (file, "%s:\n", stub);
14381 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14385 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14386 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14387 fprintf (file, "\tjmp %%edx\n");
14390 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14392 fprintf (file, "%s:\n", binder_name);
14396 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14397 fprintf (file, "\tpushl %%eax\n");
14400 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14402 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14404 machopic_lazy_symbol_ptr_section ();
14405 fprintf (file, "%s:\n", lazy_ptr_name);
14406 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14407 fprintf (file, "\t.long %s\n", binder_name);
14409 #endif /* TARGET_MACHO */
14411 /* Order the registers for register allocator. */
14414 x86_order_regs_for_local_alloc ()
14419 /* First allocate the local general purpose registers. */
14420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14421 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14422 reg_alloc_order [pos++] = i;
14424 /* Global general purpose registers. */
14425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14426 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14427 reg_alloc_order [pos++] = i;
14429 /* x87 registers come first in case we are doing FP math
14431 if (!TARGET_SSE_MATH)
14432 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14433 reg_alloc_order [pos++] = i;
14435 /* SSE registers. */
14436 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14437 reg_alloc_order [pos++] = i;
14438 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14439 reg_alloc_order [pos++] = i;
14441 /* x87 registerts. */
14442 if (TARGET_SSE_MATH)
14443 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14444 reg_alloc_order [pos++] = i;
14446 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14447 reg_alloc_order [pos++] = i;
14449 /* Initialize the rest of array as we do not allocate some registers
14451 while (pos < FIRST_PSEUDO_REGISTER)
14452 reg_alloc_order [pos++] = 0;
14455 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14456 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14460 ix86_ms_bitfield_layout_p (record_type)
14461 tree record_type ATTRIBUTE_UNUSED;
14463 return TARGET_USE_MS_BITFIELD_LAYOUT;
14466 /* Returns an expression indicating where the this parameter is
14467 located on entry to the FUNCTION. */
14470 x86_this_parameter (function)
14473 tree type = TREE_TYPE (function);
14477 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14478 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14481 if (ix86_fntype_regparm (type) > 0)
14485 parm = TYPE_ARG_TYPES (type);
14486 /* Figure out whether or not the function has a variable number of
14488 for (; parm; parm = TREE_CHAIN (parm))
14489 if (TREE_VALUE (parm) == void_type_node)
14491 /* If not, the this parameter is in %eax. */
14493 return gen_rtx_REG (SImode, 0);
14496 if (aggregate_value_p (TREE_TYPE (type)))
14497 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14499 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14502 /* Determine whether x86_output_mi_thunk can succeed. */
14505 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14506 tree thunk ATTRIBUTE_UNUSED;
14507 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14508 HOST_WIDE_INT vcall_offset;
14511 /* 64-bit can handle anything. */
14515 /* For 32-bit, everything's fine if we have one free register. */
14516 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14519 /* Need a free register for vcall_offset. */
14523 /* Need a free register for GOT references. */
14524 if (flag_pic && !(*targetm.binds_local_p) (function))
14527 /* Otherwise ok. */
14531 /* Output the assembler code for a thunk function. THUNK_DECL is the
14532 declaration for the thunk function itself, FUNCTION is the decl for
14533 the target function. DELTA is an immediate constant offset to be
14534 added to THIS. If VCALL_OFFSET is nonzero, the word at
14535 *(*this + vcall_offset) should be added to THIS. */
14538 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14539 FILE *file ATTRIBUTE_UNUSED;
14540 tree thunk ATTRIBUTE_UNUSED;
14541 HOST_WIDE_INT delta;
14542 HOST_WIDE_INT vcall_offset;
14546 rtx this = x86_this_parameter (function);
14549 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14550 pull it in now and let DELTA benefit. */
14553 else if (vcall_offset)
14555 /* Put the this parameter into %eax. */
14557 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14561 this_reg = NULL_RTX;
14563 /* Adjust the this parameter by a fixed constant. */
14566 xops[0] = GEN_INT (delta);
14567 xops[1] = this_reg ? this_reg : this;
14570 if (!x86_64_general_operand (xops[0], DImode))
14572 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14574 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14578 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14581 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14584 /* Adjust the this parameter by a value stored in the vtable. */
14588 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14590 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14592 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14595 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14597 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14599 /* Adjust the this parameter. */
14600 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14601 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14603 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14604 xops[0] = GEN_INT (vcall_offset);
14606 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14607 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14609 xops[1] = this_reg;
14611 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14613 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14616 /* If necessary, drop THIS back to its stack slot. */
14617 if (this_reg && this_reg != this)
14619 xops[0] = this_reg;
14621 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14624 xops[0] = DECL_RTL (function);
14627 if (!flag_pic || (*targetm.binds_local_p) (function))
14628 output_asm_insn ("jmp\t%P0", xops);
14631 tmp = XEXP (xops[0], 0);
14632 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14633 tmp = gen_rtx_CONST (Pmode, tmp);
14634 tmp = gen_rtx_MEM (QImode, tmp);
14636 output_asm_insn ("jmp\t%A0", xops);
14641 if (!flag_pic || (*targetm.binds_local_p) (function))
14642 output_asm_insn ("jmp\t%P0", xops);
14645 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14646 output_set_got (tmp);
14649 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14650 output_asm_insn ("jmp\t{*}%1", xops);
14656 x86_field_alignment (field, computed)
14660 enum machine_mode mode;
14661 tree type = TREE_TYPE (field);
14663 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14665 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14666 ? get_inner_array_type (type) : type);
14667 if (mode == DFmode || mode == DCmode
14668 || GET_MODE_CLASS (mode) == MODE_INT
14669 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14670 return MIN (32, computed);
14674 /* Output assembler code to FILE to increment profiler label # LABELNO
14675 for profiling a function entry. */
14677 x86_function_profiler (file, labelno)
14684 #ifndef NO_PROFILE_COUNTERS
14685 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14687 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14691 #ifndef NO_PROFILE_COUNTERS
14692 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14694 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14698 #ifndef NO_PROFILE_COUNTERS
14699 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14700 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14702 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14706 #ifndef NO_PROFILE_COUNTERS
14707 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14708 PROFILE_COUNT_REGISTER);
14710 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14714 /* Implement machine specific optimizations.
14715 At the moment we implement single transformation: AMD Athlon works faster
14716 when RET is not destination of conditional jump or directly preceeded
14717 by other jump instruction. We avoid the penalty by inserting NOP just
14718 before the RET instructions in such cases. */
14720 x86_machine_dependent_reorg (first)
14721 rtx first ATTRIBUTE_UNUSED;
14725 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14727 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14729 basic_block bb = e->src;
14732 bool insert = false;
14734 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14736 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14737 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14739 if (prev && GET_CODE (prev) == CODE_LABEL)
14742 for (e = bb->pred; e; e = e->pred_next)
14743 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14744 && !(e->flags & EDGE_FALLTHRU))
14749 prev = prev_active_insn (ret);
14750 if (prev && GET_CODE (prev) == JUMP_INSN
14751 && any_condjump_p (prev))
14753 /* Empty functions get branch misspredict even when the jump destination
14754 is not visible to us. */
14755 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14759 emit_insn_before (gen_nop (), ret);
14763 /* Return nonzero when QImode register that must be represented via REX prefix
14766 x86_extended_QIreg_mentioned_p (insn)
14770 extract_insn_cached (insn);
14771 for (i = 0; i < recog_data.n_operands; i++)
14772 if (REG_P (recog_data.operand[i])
14773 && REGNO (recog_data.operand[i]) >= 4)
14778 /* Return nonzero when P points to register encoded via REX prefix.
14779 Called via for_each_rtx. */
14781 extended_reg_mentioned_1 (p, data)
14783 void *data ATTRIBUTE_UNUSED;
14785 unsigned int regno;
14788 regno = REGNO (*p);
14789 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14792 /* Return true when INSN mentions register that must be encoded using REX
14795 x86_extended_reg_mentioned_p (insn)
14798 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14801 #include "gt-i386.h"