1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
72 struct processor_costs size_cost = { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 /* Processor costs (relative to an add) */
126 struct processor_costs i386_cost = { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
179 struct processor_costs i486_cost = { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
232 struct processor_costs pentium_cost = {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
285 struct processor_costs pentiumpro_cost = {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
338 struct processor_costs geode_cost = {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
392 struct processor_costs k6_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
445 struct processor_costs athlon_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
498 struct processor_costs k8_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
545 100, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
556 struct processor_costs pentium4_cost = {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
609 struct processor_costs nocona_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
661 /* Generic64 should produce code tuned for Nocona and K8. */
663 struct processor_costs generic64_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 /* On all chips taken into consideration lea is 2 cycles and more. With
666 this cost however our current implementation of synth_mult results in
667 use of unnecessary temporary registers causing regression on several
668 SPECfp benchmarks. */
669 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
670 COSTS_N_INSNS (1), /* variable shift costs */
671 COSTS_N_INSNS (1), /* constant shift costs */
672 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
673 COSTS_N_INSNS (4), /* HI */
674 COSTS_N_INSNS (3), /* SI */
675 COSTS_N_INSNS (4), /* DI */
676 COSTS_N_INSNS (2)}, /* other */
677 0, /* cost of multiply per each bit set */
678 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
679 COSTS_N_INSNS (26), /* HI */
680 COSTS_N_INSNS (42), /* SI */
681 COSTS_N_INSNS (74), /* DI */
682 COSTS_N_INSNS (74)}, /* other */
683 COSTS_N_INSNS (1), /* cost of movsx */
684 COSTS_N_INSNS (1), /* cost of movzx */
685 8, /* "large" insn */
687 4, /* cost for loading QImode using movzbl */
688 {4, 4, 4}, /* cost of loading integer registers
689 in QImode, HImode and SImode.
690 Relative to reg-reg move (2). */
691 {4, 4, 4}, /* cost of storing integer registers */
692 4, /* cost of reg,reg fld/fst */
693 {12, 12, 12}, /* cost of loading fp registers
694 in SFmode, DFmode and XFmode */
695 {6, 6, 8}, /* cost of storing fp registers
696 in SFmode, DFmode and XFmode */
697 2, /* cost of moving MMX register */
698 {8, 8}, /* cost of loading MMX registers
699 in SImode and DImode */
700 {8, 8}, /* cost of storing MMX registers
701 in SImode and DImode */
702 2, /* cost of moving SSE register */
703 {8, 8, 8}, /* cost of loading SSE registers
704 in SImode, DImode and TImode */
705 {8, 8, 8}, /* cost of storing SSE registers
706 in SImode, DImode and TImode */
707 5, /* MMX or SSE register to integer */
708 64, /* size of prefetch block */
709 6, /* number of parallel prefetches */
710 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
711 is increased to perhaps more appropriate value of 5. */
713 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
714 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
715 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
716 COSTS_N_INSNS (8), /* cost of FABS instruction. */
717 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
718 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
721 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
723 struct processor_costs generic32_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (4), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (4), /* DI */
732 COSTS_N_INSNS (2)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (26), /* HI */
736 COSTS_N_INSNS (42), /* SI */
737 COSTS_N_INSNS (74), /* DI */
738 COSTS_N_INSNS (74)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
743 4, /* cost for loading QImode using movzbl */
744 {4, 4, 4}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 4, /* cost of reg,reg fld/fst */
749 {12, 12, 12}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {6, 6, 8}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 2, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {8, 8, 8}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {8, 8, 8}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 5, /* MMX or SSE register to integer */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
775 const struct processor_costs *ix86_cost = &pentium_cost;
777 /* Processor feature/optimization bitmasks. */
778 #define m_386 (1<<PROCESSOR_I386)
779 #define m_486 (1<<PROCESSOR_I486)
780 #define m_PENT (1<<PROCESSOR_PENTIUM)
781 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
782 #define m_GEODE (1<<PROCESSOR_GEODE)
783 #define m_K6_GEODE (m_K6 | m_GEODE)
784 #define m_K6 (1<<PROCESSOR_K6)
785 #define m_ATHLON (1<<PROCESSOR_ATHLON)
786 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
787 #define m_K8 (1<<PROCESSOR_K8)
788 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
789 #define m_NOCONA (1<<PROCESSOR_NOCONA)
790 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
791 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
792 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
794 /* Generic instruction choice should be common subset of supported CPUs
795 (PPro/PENT4/NOCONA/Athlon/K8). */
797 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
798 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
799 generic because it is not working well with PPro base chips. */
800 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
801 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
802 const int x86_zero_extend_with_and = m_486 | m_PENT;
803 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
804 const int x86_double_with_add = ~m_386;
805 const int x86_use_bit_test = m_386;
806 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
807 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
808 const int x86_3dnow_a = m_ATHLON_K8;
809 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
810 /* Branch hints were put in P4 based on simulation result. But
811 after P4 was made, no performance benefit was observed with
812 branch hints. It also increases the code size. As the result,
813 icc never generates branch hints. */
814 const int x86_branch_hints = 0;
815 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
816 /* We probably ought to watch for partial register stalls on Generic32
817 compilation setting as well. However in current implementation the
818 partial register stalls are not eliminated very well - they can
819 be introduced via subregs synthesized by combine and can happen
820 in caller/callee saving sequences.
821 Because this option pays back little on PPro based chips and is in conflict
822 with partial reg. dependencies used by Athlon/P4 based chips, it is better
823 to leave it off for generic32 for now. */
824 const int x86_partial_reg_stall = m_PPRO;
825 const int x86_partial_flag_reg_stall = m_GENERIC;
826 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
827 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
828 const int x86_use_mov0 = m_K6;
829 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
830 const int x86_read_modify_write = ~m_PENT;
831 const int x86_read_modify = ~(m_PENT | m_PPRO);
832 const int x86_split_long_moves = m_PPRO;
833 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
834 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
835 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
836 const int x86_qimode_math = ~(0);
837 const int x86_promote_qi_regs = 0;
838 /* On PPro this flag is meant to avoid partial register stalls. Just like
839 the x86_partial_reg_stall this option might be considered for Generic32
840 if our scheme for avoiding partial stalls was more effective. */
841 const int x86_himode_math = ~(m_PPRO);
842 const int x86_promote_hi_regs = m_PPRO;
843 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
845 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
846 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
847 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
848 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
850 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
851 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
852 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
853 const int x86_shift1 = ~m_486;
854 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
855 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
856 that thread 128bit SSE registers as single units versus K8 based chips that
857 divide SSE registers to two 64bit halves.
858 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
859 to allow register renaming on 128bit SSE units, but usually results in one
860 extra microop on 64bit SSE units. Experimental results shows that disabling
861 this option on P4 brings over 20% SPECfp regression, while enabling it on
862 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
864 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
865 /* Set for machines where the type and dependencies are resolved on SSE
866 register parts instead of whole registers, so we may maintain just
867 lower part of scalar values in proper format leaving the upper part
869 const int x86_sse_split_regs = m_ATHLON_K8;
870 const int x86_sse_typeless_stores = m_ATHLON_K8;
871 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
872 const int x86_use_ffreep = m_ATHLON_K8;
873 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
874 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
876 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
877 integer data in xmm registers. Which results in pretty abysmal code. */
878 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
880 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
881 /* Some CPU cores are not able to predict more than 4 branch instructions in
882 the 16 byte window. */
883 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
884 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
885 const int x86_use_bt = m_ATHLON_K8;
886 /* Compare and exchange was added for 80486. */
887 const int x86_cmpxchg = ~m_386;
888 /* Compare and exchange 8 bytes was added for pentium. */
889 const int x86_cmpxchg8b = ~(m_386 | m_486);
890 /* Compare and exchange 16 bytes was added for nocona. */
891 const int x86_cmpxchg16b = m_NOCONA;
892 /* Exchange and add was added for 80486. */
893 const int x86_xadd = ~m_386;
894 /* Byteswap was added for 80486. */
895 const int x86_bswap = ~m_386;
896 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
898 /* In case the average insn count for single function invocation is
899 lower than this constant, emit fast (but longer) prologue and
901 #define FAST_PROLOGUE_INSN_COUNT 20
903 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
904 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
905 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
906 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
908 /* Array of the smallest class containing reg number REGNO, indexed by
909 REGNO. Used by REGNO_REG_CLASS in i386.h. */
911 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
914 AREG, DREG, CREG, BREG,
916 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
918 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
919 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
922 /* flags, fpsr, fpcr, dirflag, frame */
923 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
926 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
928 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
929 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
930 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
934 /* The "default" register map used in 32bit mode. */
936 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
938 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
939 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
940 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
941 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
942 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
943 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
944 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
947 static int const x86_64_int_parameter_registers[6] =
949 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
950 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
953 static int const x86_64_int_return_registers[4] =
955 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
958 /* The "default" register map used in 64bit mode. */
959 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
961 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
962 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
963 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
964 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
965 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
966 8,9,10,11,12,13,14,15, /* extended integer registers */
967 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
970 /* Define the register numbers to be used in Dwarf debugging information.
971 The SVR4 reference port C compiler uses the following register numbers
972 in its Dwarf output code:
973 0 for %eax (gcc regno = 0)
974 1 for %ecx (gcc regno = 2)
975 2 for %edx (gcc regno = 1)
976 3 for %ebx (gcc regno = 3)
977 4 for %esp (gcc regno = 7)
978 5 for %ebp (gcc regno = 6)
979 6 for %esi (gcc regno = 4)
980 7 for %edi (gcc regno = 5)
981 The following three DWARF register numbers are never generated by
982 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
983 believes these numbers have these meanings.
984 8 for %eip (no gcc equivalent)
985 9 for %eflags (gcc regno = 17)
986 10 for %trapno (no gcc equivalent)
987 It is not at all clear how we should number the FP stack registers
988 for the x86 architecture. If the version of SDB on x86/svr4 were
989 a bit less brain dead with respect to floating-point then we would
990 have a precedent to follow with respect to DWARF register numbers
991 for x86 FP registers, but the SDB on x86/svr4 is so completely
992 broken with respect to FP registers that it is hardly worth thinking
993 of it as something to strive for compatibility with.
994 The version of x86/svr4 SDB I have at the moment does (partially)
995 seem to believe that DWARF register number 11 is associated with
996 the x86 register %st(0), but that's about all. Higher DWARF
997 register numbers don't seem to be associated with anything in
998 particular, and even for DWARF regno 11, SDB only seems to under-
999 stand that it should say that a variable lives in %st(0) (when
1000 asked via an `=' command) if we said it was in DWARF regno 11,
1001 but SDB still prints garbage when asked for the value of the
1002 variable in question (via a `/' command).
1003 (Also note that the labels SDB prints for various FP stack regs
1004 when doing an `x' command are all wrong.)
1005 Note that these problems generally don't affect the native SVR4
1006 C compiler because it doesn't allow the use of -O with -g and
1007 because when it is *not* optimizing, it allocates a memory
1008 location for each floating-point variable, and the memory
1009 location is what gets described in the DWARF AT_location
1010 attribute for the variable in question.
1011 Regardless of the severe mental illness of the x86/svr4 SDB, we
1012 do something sensible here and we use the following DWARF
1013 register numbers. Note that these are all stack-top-relative
1015 11 for %st(0) (gcc regno = 8)
1016 12 for %st(1) (gcc regno = 9)
1017 13 for %st(2) (gcc regno = 10)
1018 14 for %st(3) (gcc regno = 11)
1019 15 for %st(4) (gcc regno = 12)
1020 16 for %st(5) (gcc regno = 13)
1021 17 for %st(6) (gcc regno = 14)
1022 18 for %st(7) (gcc regno = 15)
1024 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1026 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1027 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1028 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1029 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1030 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1031 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1032 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1035 /* Test and compare insns in i386.md store the information needed to
1036 generate branch and scc insns here. */
1038 rtx ix86_compare_op0 = NULL_RTX;
1039 rtx ix86_compare_op1 = NULL_RTX;
1040 rtx ix86_compare_emitted = NULL_RTX;
1042 /* Size of the register save area. */
1043 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1045 /* Define the structure for the machine field in struct function. */
1047 struct stack_local_entry GTY(())
1049 unsigned short mode;
1052 struct stack_local_entry *next;
1055 /* Structure describing stack frame layout.
1056 Stack grows downward:
1062 saved frame pointer if frame_pointer_needed
1063 <- HARD_FRAME_POINTER
1068 [va_arg registers] (
1069 > to_allocate <- FRAME_POINTER
1079 HOST_WIDE_INT frame;
1081 int outgoing_arguments_size;
1084 HOST_WIDE_INT to_allocate;
1085 /* The offsets relative to ARG_POINTER. */
1086 HOST_WIDE_INT frame_pointer_offset;
1087 HOST_WIDE_INT hard_frame_pointer_offset;
1088 HOST_WIDE_INT stack_pointer_offset;
1090 /* When save_regs_using_mov is set, emit prologue using
1091 move instead of push instructions. */
1092 bool save_regs_using_mov;
1095 /* Code model option. */
1096 enum cmodel ix86_cmodel;
1098 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1100 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1102 /* Which unit we are generating floating point math for. */
1103 enum fpmath_unit ix86_fpmath;
1105 /* Which cpu are we scheduling for. */
1106 enum processor_type ix86_tune;
1107 /* Which instruction set architecture to use. */
1108 enum processor_type ix86_arch;
1110 /* true if sse prefetch instruction is not NOOP. */
1111 int x86_prefetch_sse;
1113 /* ix86_regparm_string as a number */
1114 static int ix86_regparm;
1116 /* -mstackrealign option */
1117 extern int ix86_force_align_arg_pointer;
1118 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1120 /* Preferred alignment for stack boundary in bits. */
1121 unsigned int ix86_preferred_stack_boundary;
1123 /* Values 1-5: see jump.c */
1124 int ix86_branch_cost;
1126 /* Variables which are this size or smaller are put in the data/bss
1127 or ldata/lbss sections. */
1129 int ix86_section_threshold = 65536;
1131 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1132 char internal_label_prefix[16];
1133 int internal_label_prefix_len;
1135 static bool ix86_handle_option (size_t, const char *, int);
1136 static void output_pic_addr_const (FILE *, rtx, int);
1137 static void put_condition_code (enum rtx_code, enum machine_mode,
1139 static const char *get_some_local_dynamic_name (void);
1140 static int get_some_local_dynamic_name_1 (rtx *, void *);
1141 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1142 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1144 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1145 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1147 static rtx get_thread_pointer (int);
1148 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1149 static void get_pc_thunk_name (char [32], unsigned int);
1150 static rtx gen_push (rtx);
1151 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1152 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1153 static struct machine_function * ix86_init_machine_status (void);
1154 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1155 static int ix86_nsaved_regs (void);
1156 static void ix86_emit_save_regs (void);
1157 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1158 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1159 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1160 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1161 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1162 static rtx ix86_expand_aligntest (rtx, int);
1163 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1164 static int ix86_issue_rate (void);
1165 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1166 static int ia32_multipass_dfa_lookahead (void);
1167 static void ix86_init_mmx_sse_builtins (void);
1168 static rtx x86_this_parameter (tree);
1169 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1170 HOST_WIDE_INT, tree);
1171 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1172 static void x86_file_start (void);
1173 static void ix86_reorg (void);
1174 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1175 static tree ix86_build_builtin_va_list (void);
1176 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1178 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1179 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1180 static bool ix86_vector_mode_supported_p (enum machine_mode);
1182 static int ix86_address_cost (rtx);
1183 static bool ix86_cannot_force_const_mem (rtx);
1184 static rtx ix86_delegitimize_address (rtx);
1186 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1188 struct builtin_description;
1189 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1191 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1193 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1194 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1195 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1196 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1197 static rtx safe_vector_operand (rtx, enum machine_mode);
1198 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1199 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1200 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1201 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1202 static int ix86_fp_comparison_cost (enum rtx_code code);
1203 static unsigned int ix86_select_alt_pic_regnum (void);
1204 static int ix86_save_reg (unsigned int, int);
1205 static void ix86_compute_frame_layout (struct ix86_frame *);
1206 static int ix86_comp_type_attributes (tree, tree);
1207 static int ix86_function_regparm (tree, tree);
1208 const struct attribute_spec ix86_attribute_table[];
1209 static bool ix86_function_ok_for_sibcall (tree, tree);
1210 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1211 static int ix86_value_regno (enum machine_mode, tree, tree);
1212 static bool contains_128bit_aligned_vector_p (tree);
1213 static rtx ix86_struct_value_rtx (tree, int);
1214 static bool ix86_ms_bitfield_layout_p (tree);
1215 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1216 static int extended_reg_mentioned_1 (rtx *, void *);
1217 static bool ix86_rtx_costs (rtx, int, int, int *);
1218 static int min_insn_size (rtx);
1219 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1220 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1221 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1223 static void ix86_init_builtins (void);
1224 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1225 static const char *ix86_mangle_fundamental_type (tree);
1226 static tree ix86_stack_protect_fail (void);
1227 static rtx ix86_internal_arg_pointer (void);
1228 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1230 /* This function is only used on Solaris. */
1231 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1234 /* Register class used for passing given 64bit part of the argument.
1235 These represent classes as documented by the PS ABI, with the exception
1236 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1237 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1239 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1240 whenever possible (upper half does contain padding).
1242 enum x86_64_reg_class
1245 X86_64_INTEGER_CLASS,
1246 X86_64_INTEGERSI_CLASS,
1253 X86_64_COMPLEX_X87_CLASS,
1256 static const char * const x86_64_reg_class_name[] = {
1257 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1258 "sseup", "x87", "x87up", "cplx87", "no"
1261 #define MAX_CLASSES 4
1263 /* Table of constants used by fldpi, fldln2, etc.... */
1264 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1265 static bool ext_80387_constants_init = 0;
1266 static void init_ext_80387_constants (void);
1267 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1268 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1269 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1270 static section *x86_64_elf_select_section (tree decl, int reloc,
1271 unsigned HOST_WIDE_INT align)
1274 /* Initialize the GCC target structure. */
1275 #undef TARGET_ATTRIBUTE_TABLE
1276 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1277 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1278 # undef TARGET_MERGE_DECL_ATTRIBUTES
1279 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1282 #undef TARGET_COMP_TYPE_ATTRIBUTES
1283 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1285 #undef TARGET_INIT_BUILTINS
1286 #define TARGET_INIT_BUILTINS ix86_init_builtins
1287 #undef TARGET_EXPAND_BUILTIN
1288 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1290 #undef TARGET_ASM_FUNCTION_EPILOGUE
1291 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1293 #undef TARGET_ENCODE_SECTION_INFO
1294 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1295 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1297 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1300 #undef TARGET_ASM_OPEN_PAREN
1301 #define TARGET_ASM_OPEN_PAREN ""
1302 #undef TARGET_ASM_CLOSE_PAREN
1303 #define TARGET_ASM_CLOSE_PAREN ""
1305 #undef TARGET_ASM_ALIGNED_HI_OP
1306 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1307 #undef TARGET_ASM_ALIGNED_SI_OP
1308 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1314 #undef TARGET_ASM_UNALIGNED_HI_OP
1315 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1316 #undef TARGET_ASM_UNALIGNED_SI_OP
1317 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1318 #undef TARGET_ASM_UNALIGNED_DI_OP
1319 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1321 #undef TARGET_SCHED_ADJUST_COST
1322 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1323 #undef TARGET_SCHED_ISSUE_RATE
1324 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1325 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1326 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1327 ia32_multipass_dfa_lookahead
1329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1330 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1333 #undef TARGET_HAVE_TLS
1334 #define TARGET_HAVE_TLS true
1336 #undef TARGET_CANNOT_FORCE_CONST_MEM
1337 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1338 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1339 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1341 #undef TARGET_DELEGITIMIZE_ADDRESS
1342 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1344 #undef TARGET_MS_BITFIELD_LAYOUT_P
1345 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1348 #undef TARGET_BINDS_LOCAL_P
1349 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1352 #undef TARGET_ASM_OUTPUT_MI_THUNK
1353 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1354 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1355 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1357 #undef TARGET_ASM_FILE_START
1358 #define TARGET_ASM_FILE_START x86_file_start
1360 #undef TARGET_DEFAULT_TARGET_FLAGS
1361 #define TARGET_DEFAULT_TARGET_FLAGS \
1363 | TARGET_64BIT_DEFAULT \
1364 | TARGET_SUBTARGET_DEFAULT \
1365 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1367 #undef TARGET_HANDLE_OPTION
1368 #define TARGET_HANDLE_OPTION ix86_handle_option
1370 #undef TARGET_RTX_COSTS
1371 #define TARGET_RTX_COSTS ix86_rtx_costs
1372 #undef TARGET_ADDRESS_COST
1373 #define TARGET_ADDRESS_COST ix86_address_cost
1375 #undef TARGET_FIXED_CONDITION_CODE_REGS
1376 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1377 #undef TARGET_CC_MODES_COMPATIBLE
1378 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1380 #undef TARGET_MACHINE_DEPENDENT_REORG
1381 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1383 #undef TARGET_BUILD_BUILTIN_VA_LIST
1384 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1386 #undef TARGET_MD_ASM_CLOBBERS
1387 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1389 #undef TARGET_PROMOTE_PROTOTYPES
1390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1391 #undef TARGET_STRUCT_VALUE_RTX
1392 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1393 #undef TARGET_SETUP_INCOMING_VARARGS
1394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1395 #undef TARGET_MUST_PASS_IN_STACK
1396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1397 #undef TARGET_PASS_BY_REFERENCE
1398 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1399 #undef TARGET_INTERNAL_ARG_POINTER
1400 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1401 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1402 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1404 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1405 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1407 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1408 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1410 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1411 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1414 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1415 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1418 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1419 #undef TARGET_INSERT_ATTRIBUTES
1420 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1423 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1424 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1426 #undef TARGET_STACK_PROTECT_FAIL
1427 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1429 #undef TARGET_FUNCTION_VALUE
1430 #define TARGET_FUNCTION_VALUE ix86_function_value
1432 struct gcc_target targetm = TARGET_INITIALIZER;
1435 /* The svr4 ABI for the i386 says that records and unions are returned
1437 #ifndef DEFAULT_PCC_STRUCT_RETURN
1438 #define DEFAULT_PCC_STRUCT_RETURN 1
1441 /* Implement TARGET_HANDLE_OPTION. */
1444 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1451 target_flags &= ~MASK_3DNOW_A;
1452 target_flags_explicit |= MASK_3DNOW_A;
1459 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1460 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1467 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1468 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1475 target_flags &= ~MASK_SSE3;
1476 target_flags_explicit |= MASK_SSE3;
1485 /* Sometimes certain combinations of command options do not make
1486 sense on a particular target machine. You can define a macro
1487 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1488 defined, is executed once just after all the command options have
1491 Don't use this macro to turn on various extra optimizations for
1492 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1495 override_options (void)
1498 int ix86_tune_defaulted = 0;
1500 /* Comes from final.c -- no real reason to change it. */
1501 #define MAX_CODE_ALIGN 16
1505 const struct processor_costs *cost; /* Processor costs */
1506 const int target_enable; /* Target flags to enable. */
1507 const int target_disable; /* Target flags to disable. */
1508 const int align_loop; /* Default alignments. */
1509 const int align_loop_max_skip;
1510 const int align_jump;
1511 const int align_jump_max_skip;
1512 const int align_func;
1514 const processor_target_table[PROCESSOR_max] =
1516 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1517 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1518 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1520 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1521 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1522 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1523 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1524 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1525 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1526 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1527 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1530 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1533 const char *const name; /* processor name or nickname. */
1534 const enum processor_type processor;
1535 const enum pta_flags
1541 PTA_PREFETCH_SSE = 16,
1548 const processor_alias_table[] =
1550 {"i386", PROCESSOR_I386, 0},
1551 {"i486", PROCESSOR_I486, 0},
1552 {"i586", PROCESSOR_PENTIUM, 0},
1553 {"pentium", PROCESSOR_PENTIUM, 0},
1554 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1555 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1556 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1557 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1558 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1559 {"i686", PROCESSOR_PENTIUMPRO, 0},
1560 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1561 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1562 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1563 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1564 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1565 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1568 | PTA_MMX | PTA_PREFETCH_SSE},
1569 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1570 | PTA_MMX | PTA_PREFETCH_SSE},
1571 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1572 | PTA_MMX | PTA_PREFETCH_SSE},
1573 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1575 {"k6", PROCESSOR_K6, PTA_MMX},
1576 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1577 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1578 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1580 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1581 | PTA_3DNOW | PTA_3DNOW_A},
1582 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1583 | PTA_3DNOW_A | PTA_SSE},
1584 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1585 | PTA_3DNOW_A | PTA_SSE},
1586 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1587 | PTA_3DNOW_A | PTA_SSE},
1588 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1589 | PTA_SSE | PTA_SSE2 },
1590 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1593 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1594 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1595 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1596 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1597 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1598 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1599 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1602 int const pta_size = ARRAY_SIZE (processor_alias_table);
1604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1605 SUBTARGET_OVERRIDE_OPTIONS;
1608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1609 SUBSUBTARGET_OVERRIDE_OPTIONS;
1612 /* -fPIC is the default for x86_64. */
1613 if (TARGET_MACHO && TARGET_64BIT)
1616 /* Set the default values for switches whose default depends on TARGET_64BIT
1617 in case they weren't overwritten by command line options. */
1620 /* Mach-O doesn't support omitting the frame pointer for now. */
1621 if (flag_omit_frame_pointer == 2)
1622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1623 if (flag_asynchronous_unwind_tables == 2)
1624 flag_asynchronous_unwind_tables = 1;
1625 if (flag_pcc_struct_return == 2)
1626 flag_pcc_struct_return = 0;
1630 if (flag_omit_frame_pointer == 2)
1631 flag_omit_frame_pointer = 0;
1632 if (flag_asynchronous_unwind_tables == 2)
1633 flag_asynchronous_unwind_tables = 0;
1634 if (flag_pcc_struct_return == 2)
1635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1638 /* Need to check -mtune=generic first. */
1639 if (ix86_tune_string)
1641 if (!strcmp (ix86_tune_string, "generic")
1642 || !strcmp (ix86_tune_string, "i686")
1643 /* As special support for cross compilers we read -mtune=native
1644 as -mtune=generic. With native compilers we won't see the
1645 -mtune=native, as it was changed by the driver. */
1646 || !strcmp (ix86_tune_string, "native"))
1649 ix86_tune_string = "generic64";
1651 ix86_tune_string = "generic32";
1653 else if (!strncmp (ix86_tune_string, "generic", 7))
1654 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1658 if (ix86_arch_string)
1659 ix86_tune_string = ix86_arch_string;
1660 if (!ix86_tune_string)
1662 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1663 ix86_tune_defaulted = 1;
1666 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1667 need to use a sensible tune option. */
1668 if (!strcmp (ix86_tune_string, "generic")
1669 || !strcmp (ix86_tune_string, "x86-64")
1670 || !strcmp (ix86_tune_string, "i686"))
1673 ix86_tune_string = "generic64";
1675 ix86_tune_string = "generic32";
1678 if (!strcmp (ix86_tune_string, "x86-64"))
1679 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1680 "-mtune=generic instead as appropriate.");
1682 if (!ix86_arch_string)
1683 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1684 if (!strcmp (ix86_arch_string, "generic"))
1685 error ("generic CPU can be used only for -mtune= switch");
1686 if (!strncmp (ix86_arch_string, "generic", 7))
1687 error ("bad value (%s) for -march= switch", ix86_arch_string);
1689 if (ix86_cmodel_string != 0)
1691 if (!strcmp (ix86_cmodel_string, "small"))
1692 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1693 else if (!strcmp (ix86_cmodel_string, "medium"))
1694 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1696 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1697 else if (!strcmp (ix86_cmodel_string, "32"))
1698 ix86_cmodel = CM_32;
1699 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1700 ix86_cmodel = CM_KERNEL;
1701 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1702 ix86_cmodel = CM_LARGE;
1704 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1708 ix86_cmodel = CM_32;
1710 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1712 if (ix86_asm_string != 0)
1715 && !strcmp (ix86_asm_string, "intel"))
1716 ix86_asm_dialect = ASM_INTEL;
1717 else if (!strcmp (ix86_asm_string, "att"))
1718 ix86_asm_dialect = ASM_ATT;
1720 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1722 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1723 error ("code model %qs not supported in the %s bit mode",
1724 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1725 if (ix86_cmodel == CM_LARGE)
1726 sorry ("code model %<large%> not supported yet");
1727 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1728 sorry ("%i-bit mode not compiled in",
1729 (target_flags & MASK_64BIT) ? 64 : 32);
1731 for (i = 0; i < pta_size; i++)
1732 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1734 ix86_arch = processor_alias_table[i].processor;
1735 /* Default cpu tuning to the architecture. */
1736 ix86_tune = ix86_arch;
1737 if (processor_alias_table[i].flags & PTA_MMX
1738 && !(target_flags_explicit & MASK_MMX))
1739 target_flags |= MASK_MMX;
1740 if (processor_alias_table[i].flags & PTA_3DNOW
1741 && !(target_flags_explicit & MASK_3DNOW))
1742 target_flags |= MASK_3DNOW;
1743 if (processor_alias_table[i].flags & PTA_3DNOW_A
1744 && !(target_flags_explicit & MASK_3DNOW_A))
1745 target_flags |= MASK_3DNOW_A;
1746 if (processor_alias_table[i].flags & PTA_SSE
1747 && !(target_flags_explicit & MASK_SSE))
1748 target_flags |= MASK_SSE;
1749 if (processor_alias_table[i].flags & PTA_SSE2
1750 && !(target_flags_explicit & MASK_SSE2))
1751 target_flags |= MASK_SSE2;
1752 if (processor_alias_table[i].flags & PTA_SSE3
1753 && !(target_flags_explicit & MASK_SSE3))
1754 target_flags |= MASK_SSE3;
1755 if (processor_alias_table[i].flags & PTA_SSSE3
1756 && !(target_flags_explicit & MASK_SSSE3))
1757 target_flags |= MASK_SSSE3;
1758 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1759 x86_prefetch_sse = true;
1760 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1761 error ("CPU you selected does not support x86-64 "
1767 error ("bad value (%s) for -march= switch", ix86_arch_string);
1769 for (i = 0; i < pta_size; i++)
1770 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1772 ix86_tune = processor_alias_table[i].processor;
1773 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1775 if (ix86_tune_defaulted)
1777 ix86_tune_string = "x86-64";
1778 for (i = 0; i < pta_size; i++)
1779 if (! strcmp (ix86_tune_string,
1780 processor_alias_table[i].name))
1782 ix86_tune = processor_alias_table[i].processor;
1785 error ("CPU you selected does not support x86-64 "
1788 /* Intel CPUs have always interpreted SSE prefetch instructions as
1789 NOPs; so, we can enable SSE prefetch instructions even when
1790 -mtune (rather than -march) points us to a processor that has them.
1791 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1792 higher processors. */
1793 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1794 x86_prefetch_sse = true;
1798 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1801 ix86_cost = &size_cost;
1803 ix86_cost = processor_target_table[ix86_tune].cost;
1804 target_flags |= processor_target_table[ix86_tune].target_enable;
1805 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1807 /* Arrange to set up i386_stack_locals for all functions. */
1808 init_machine_status = ix86_init_machine_status;
1810 /* Validate -mregparm= value. */
1811 if (ix86_regparm_string)
1813 i = atoi (ix86_regparm_string);
1814 if (i < 0 || i > REGPARM_MAX)
1815 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1821 ix86_regparm = REGPARM_MAX;
1823 /* If the user has provided any of the -malign-* options,
1824 warn and use that value only if -falign-* is not set.
1825 Remove this code in GCC 3.2 or later. */
1826 if (ix86_align_loops_string)
1828 warning (0, "-malign-loops is obsolete, use -falign-loops");
1829 if (align_loops == 0)
1831 i = atoi (ix86_align_loops_string);
1832 if (i < 0 || i > MAX_CODE_ALIGN)
1833 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1835 align_loops = 1 << i;
1839 if (ix86_align_jumps_string)
1841 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1842 if (align_jumps == 0)
1844 i = atoi (ix86_align_jumps_string);
1845 if (i < 0 || i > MAX_CODE_ALIGN)
1846 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1848 align_jumps = 1 << i;
1852 if (ix86_align_funcs_string)
1854 warning (0, "-malign-functions is obsolete, use -falign-functions");
1855 if (align_functions == 0)
1857 i = atoi (ix86_align_funcs_string);
1858 if (i < 0 || i > MAX_CODE_ALIGN)
1859 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1861 align_functions = 1 << i;
1865 /* Default align_* from the processor table. */
1866 if (align_loops == 0)
1868 align_loops = processor_target_table[ix86_tune].align_loop;
1869 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1871 if (align_jumps == 0)
1873 align_jumps = processor_target_table[ix86_tune].align_jump;
1874 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1876 if (align_functions == 0)
1878 align_functions = processor_target_table[ix86_tune].align_func;
1881 /* Validate -mbranch-cost= value, or provide default. */
1882 ix86_branch_cost = ix86_cost->branch_cost;
1883 if (ix86_branch_cost_string)
1885 i = atoi (ix86_branch_cost_string);
1887 error ("-mbranch-cost=%d is not between 0 and 5", i);
1889 ix86_branch_cost = i;
1891 if (ix86_section_threshold_string)
1893 i = atoi (ix86_section_threshold_string);
1895 error ("-mlarge-data-threshold=%d is negative", i);
1897 ix86_section_threshold = i;
1900 if (ix86_tls_dialect_string)
1902 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1903 ix86_tls_dialect = TLS_DIALECT_GNU;
1904 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1905 ix86_tls_dialect = TLS_DIALECT_GNU2;
1906 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1907 ix86_tls_dialect = TLS_DIALECT_SUN;
1909 error ("bad value (%s) for -mtls-dialect= switch",
1910 ix86_tls_dialect_string);
1913 /* Keep nonleaf frame pointers. */
1914 if (flag_omit_frame_pointer)
1915 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1916 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1917 flag_omit_frame_pointer = 1;
1919 /* If we're doing fast math, we don't care about comparison order
1920 wrt NaNs. This lets us use a shorter comparison sequence. */
1921 if (flag_finite_math_only)
1922 target_flags &= ~MASK_IEEE_FP;
1924 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1925 since the insns won't need emulation. */
1926 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1927 target_flags &= ~MASK_NO_FANCY_MATH_387;
1929 /* Likewise, if the target doesn't have a 387, or we've specified
1930 software floating point, don't use 387 inline intrinsics. */
1932 target_flags |= MASK_NO_FANCY_MATH_387;
1934 /* Turn on SSE3 builtins for -mssse3. */
1936 target_flags |= MASK_SSE3;
1938 /* Turn on SSE2 builtins for -msse3. */
1940 target_flags |= MASK_SSE2;
1942 /* Turn on SSE builtins for -msse2. */
1944 target_flags |= MASK_SSE;
1946 /* Turn on MMX builtins for -msse. */
1949 target_flags |= MASK_MMX & ~target_flags_explicit;
1950 x86_prefetch_sse = true;
1953 /* Turn on MMX builtins for 3Dnow. */
1955 target_flags |= MASK_MMX;
1959 if (TARGET_ALIGN_DOUBLE)
1960 error ("-malign-double makes no sense in the 64bit mode");
1962 error ("-mrtd calling convention not supported in the 64bit mode");
1964 /* Enable by default the SSE and MMX builtins. Do allow the user to
1965 explicitly disable any of these. In particular, disabling SSE and
1966 MMX for kernel code is extremely useful. */
1968 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1969 & ~target_flags_explicit);
1973 /* i386 ABI does not specify red zone. It still makes sense to use it
1974 when programmer takes care to stack from being destroyed. */
1975 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1976 target_flags |= MASK_NO_RED_ZONE;
1979 /* Validate -mpreferred-stack-boundary= value, or provide default.
1980 The default of 128 bits is for Pentium III's SSE __m128. We can't
1981 change it because of optimize_size. Otherwise, we can't mix object
1982 files compiled with -Os and -On. */
1983 ix86_preferred_stack_boundary = 128;
1984 if (ix86_preferred_stack_boundary_string)
1986 i = atoi (ix86_preferred_stack_boundary_string);
1987 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1988 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1989 TARGET_64BIT ? 4 : 2);
1991 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1994 /* Accept -mx87regparm only if 80387 support is enabled. */
1995 if (TARGET_X87REGPARM
1997 error ("-mx87regparm used without 80387 enabled");
1999 /* Accept -msseregparm only if at least SSE support is enabled. */
2000 if (TARGET_SSEREGPARM
2002 error ("-msseregparm used without SSE enabled");
2004 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2006 if (ix86_fpmath_string != 0)
2008 if (! strcmp (ix86_fpmath_string, "387"))
2009 ix86_fpmath = FPMATH_387;
2010 else if (! strcmp (ix86_fpmath_string, "sse"))
2014 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2015 ix86_fpmath = FPMATH_387;
2018 ix86_fpmath = FPMATH_SSE;
2020 else if (! strcmp (ix86_fpmath_string, "387,sse")
2021 || ! strcmp (ix86_fpmath_string, "sse,387"))
2025 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2026 ix86_fpmath = FPMATH_387;
2028 else if (!TARGET_80387)
2030 warning (0, "387 instruction set disabled, using SSE arithmetics");
2031 ix86_fpmath = FPMATH_SSE;
2034 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2037 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2040 /* If the i387 is disabled, then do not return values in it. */
2042 target_flags &= ~MASK_FLOAT_RETURNS;
2044 if ((x86_accumulate_outgoing_args & TUNEMASK)
2045 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2047 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2049 /* ??? Unwind info is not correct around the CFG unless either a frame
2050 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2051 unwind info generation to be aware of the CFG and propagating states
2053 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2054 || flag_exceptions || flag_non_call_exceptions)
2055 && flag_omit_frame_pointer
2056 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2058 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2059 warning (0, "unwind tables currently require either a frame pointer "
2060 "or -maccumulate-outgoing-args for correctness");
2061 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2064 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2067 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2068 p = strchr (internal_label_prefix, 'X');
2069 internal_label_prefix_len = p - internal_label_prefix;
2073 /* When scheduling description is not available, disable scheduler pass
2074 so it won't slow down the compilation and make x87 code slower. */
2075 if (!TARGET_SCHEDULE)
2076 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2078 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2079 set_param_value ("simultaneous-prefetches",
2080 ix86_cost->simultaneous_prefetches);
2081 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2082 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2085 /* switch to the appropriate section for output of DECL.
2086 DECL is either a `VAR_DECL' node or a constant of some sort.
2087 RELOC indicates whether forming the initial value of DECL requires
2088 link-time relocations. */
2091 x86_64_elf_select_section (tree decl, int reloc,
2092 unsigned HOST_WIDE_INT align)
2094 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2095 && ix86_in_large_data_p (decl))
2097 const char *sname = NULL;
2098 unsigned int flags = SECTION_WRITE;
2099 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2104 case SECCAT_DATA_REL:
2105 sname = ".ldata.rel";
2107 case SECCAT_DATA_REL_LOCAL:
2108 sname = ".ldata.rel.local";
2110 case SECCAT_DATA_REL_RO:
2111 sname = ".ldata.rel.ro";
2113 case SECCAT_DATA_REL_RO_LOCAL:
2114 sname = ".ldata.rel.ro.local";
2118 flags |= SECTION_BSS;
2121 case SECCAT_RODATA_MERGE_STR:
2122 case SECCAT_RODATA_MERGE_STR_INIT:
2123 case SECCAT_RODATA_MERGE_CONST:
2127 case SECCAT_SRODATA:
2134 /* We don't split these for medium model. Place them into
2135 default sections and hope for best. */
2140 /* We might get called with string constants, but get_named_section
2141 doesn't like them as they are not DECLs. Also, we need to set
2142 flags in that case. */
2144 return get_section (sname, flags, NULL);
2145 return get_named_section (decl, sname, reloc);
2148 return default_elf_select_section (decl, reloc, align);
2151 /* Build up a unique section name, expressed as a
2152 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2153 RELOC indicates whether the initial value of EXP requires
2154 link-time relocations. */
2157 x86_64_elf_unique_section (tree decl, int reloc)
2159 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2160 && ix86_in_large_data_p (decl))
2162 const char *prefix = NULL;
2163 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2164 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2166 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2169 case SECCAT_DATA_REL:
2170 case SECCAT_DATA_REL_LOCAL:
2171 case SECCAT_DATA_REL_RO:
2172 case SECCAT_DATA_REL_RO_LOCAL:
2173 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2176 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2179 case SECCAT_RODATA_MERGE_STR:
2180 case SECCAT_RODATA_MERGE_STR_INIT:
2181 case SECCAT_RODATA_MERGE_CONST:
2182 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2184 case SECCAT_SRODATA:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2200 plen = strlen (prefix);
2202 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2203 name = targetm.strip_name_encoding (name);
2204 nlen = strlen (name);
2206 string = alloca (nlen + plen + 1);
2207 memcpy (string, prefix, plen);
2208 memcpy (string + plen, name, nlen + 1);
2210 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2214 default_unique_section (decl, reloc);
2217 #ifdef COMMON_ASM_OP
2218 /* This says how to output assembler code to declare an
2219 uninitialized external linkage data object.
2221 For medium model x86-64 we need to use .largecomm opcode for
2224 x86_elf_aligned_common (FILE *file,
2225 const char *name, unsigned HOST_WIDE_INT size,
2228 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2229 && size > (unsigned int)ix86_section_threshold)
2230 fprintf (file, ".largecomm\t");
2232 fprintf (file, "%s", COMMON_ASM_OP);
2233 assemble_name (file, name);
2234 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2235 size, align / BITS_PER_UNIT);
2238 /* Utility function for targets to use in implementing
2239 ASM_OUTPUT_ALIGNED_BSS. */
2242 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2243 const char *name, unsigned HOST_WIDE_INT size,
2246 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2247 && size > (unsigned int)ix86_section_threshold)
2248 switch_to_section (get_named_section (decl, ".lbss", 0));
2250 switch_to_section (bss_section);
2251 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2252 #ifdef ASM_DECLARE_OBJECT_NAME
2253 last_assemble_variable_decl = decl;
2254 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2256 /* Standard thing is just output label for the object. */
2257 ASM_OUTPUT_LABEL (file, name);
2258 #endif /* ASM_DECLARE_OBJECT_NAME */
2259 ASM_OUTPUT_SKIP (file, size ? size : 1);
2264 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2266 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2267 make the problem with not enough registers even worse. */
2268 #ifdef INSN_SCHEDULING
2270 flag_schedule_insns = 0;
2274 /* The Darwin libraries never set errno, so we might as well
2275 avoid calling them when that's the only reason we would. */
2276 flag_errno_math = 0;
2278 /* The default values of these switches depend on the TARGET_64BIT
2279 that is not known at this moment. Mark these values with 2 and
2280 let user the to override these. In case there is no command line option
2281 specifying them, we will set the defaults in override_options. */
2283 flag_omit_frame_pointer = 2;
2284 flag_pcc_struct_return = 2;
2285 flag_asynchronous_unwind_tables = 2;
2286 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2287 SUBTARGET_OPTIMIZATION_OPTIONS;
2291 /* Table of valid machine attributes. */
2292 const struct attribute_spec ix86_attribute_table[] =
2294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2295 /* Stdcall attribute says callee is responsible for popping arguments
2296 if they are not variable. */
2297 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2298 /* Fastcall attribute says callee is responsible for popping arguments
2299 if they are not variable. */
2300 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2301 /* Cdecl attribute says the callee is a normal C declaration */
2302 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2303 /* Regparm attribute specifies how many integer arguments are to be
2304 passed in registers. */
2305 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2306 /* X87regparm attribute says we are passing floating point arguments
2307 in 80387 registers. */
2308 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2309 /* Sseregparm attribute says we are using x86_64 calling conventions
2310 for FP arguments. */
2311 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2312 /* force_align_arg_pointer says this function realigns the stack at entry. */
2313 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2314 false, true, true, ix86_handle_cconv_attribute },
2315 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2316 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2317 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2318 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2320 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2321 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2322 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2323 SUBTARGET_ATTRIBUTE_TABLE,
2325 { NULL, 0, 0, false, false, false, NULL }
2328 /* Decide whether we can make a sibling call to a function. DECL is the
2329 declaration of the function being targeted by the call and EXP is the
2330 CALL_EXPR representing the call. */
2333 ix86_function_ok_for_sibcall (tree decl, tree exp)
2338 /* If we are generating position-independent code, we cannot sibcall
2339 optimize any indirect call, or a direct call to a global function,
2340 as the PLT requires %ebx be live. */
2341 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2348 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2349 if (POINTER_TYPE_P (func))
2350 func = TREE_TYPE (func);
2353 /* Check that the return value locations are the same. Like
2354 if we are returning floats on the 80387 register stack, we cannot
2355 make a sibcall from a function that doesn't return a float to a
2356 function that does or, conversely, from a function that does return
2357 a float to a function that doesn't; the necessary stack adjustment
2358 would not be executed. This is also the place we notice
2359 differences in the return value ABI. Note that it is ok for one
2360 of the functions to have void return type as long as the return
2361 value of the other is passed in a register. */
2362 a = ix86_function_value (TREE_TYPE (exp), func, false);
2363 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2365 if (STACK_REG_P (a) || STACK_REG_P (b))
2367 if (!rtx_equal_p (a, b))
2370 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2372 else if (!rtx_equal_p (a, b))
2375 /* If this call is indirect, we'll need to be able to use a call-clobbered
2376 register for the address of the target function. Make sure that all
2377 such registers are not used for passing parameters. */
2378 if (!decl && !TARGET_64BIT)
2382 /* We're looking at the CALL_EXPR, we need the type of the function. */
2383 type = TREE_OPERAND (exp, 0); /* pointer expression */
2384 type = TREE_TYPE (type); /* pointer type */
2385 type = TREE_TYPE (type); /* function type */
2387 if (ix86_function_regparm (type, NULL) >= 3)
2389 /* ??? Need to count the actual number of registers to be used,
2390 not the possible number of registers. Fix later. */
2395 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2396 /* Dllimport'd functions are also called indirectly. */
2397 if (decl && DECL_DLLIMPORT_P (decl)
2398 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2402 /* If we forced aligned the stack, then sibcalling would unalign the
2403 stack, which may break the called function. */
2404 if (cfun->machine->force_align_arg_pointer)
2407 /* Otherwise okay. That also includes certain types of indirect calls. */
2411 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2412 and "sseregparm" calling convention attributes;
2413 arguments as in struct attribute_spec.handler. */
2416 ix86_handle_cconv_attribute (tree *node, tree name,
2418 int flags ATTRIBUTE_UNUSED,
2421 if (TREE_CODE (*node) != FUNCTION_TYPE
2422 && TREE_CODE (*node) != METHOD_TYPE
2423 && TREE_CODE (*node) != FIELD_DECL
2424 && TREE_CODE (*node) != TYPE_DECL)
2426 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2427 IDENTIFIER_POINTER (name));
2428 *no_add_attrs = true;
2432 /* Can combine regparm with all attributes but fastcall. */
2433 if (is_attribute_p ("regparm", name))
2437 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2439 error ("fastcall and regparm attributes are not compatible");
2442 cst = TREE_VALUE (args);
2443 if (TREE_CODE (cst) != INTEGER_CST)
2445 warning (OPT_Wattributes,
2446 "%qs attribute requires an integer constant argument",
2447 IDENTIFIER_POINTER (name));
2448 *no_add_attrs = true;
2450 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2452 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2453 IDENTIFIER_POINTER (name), REGPARM_MAX);
2454 *no_add_attrs = true;
2458 && lookup_attribute (ix86_force_align_arg_pointer_string,
2459 TYPE_ATTRIBUTES (*node))
2460 && compare_tree_int (cst, REGPARM_MAX-1))
2462 error ("%s functions limited to %d register parameters",
2463 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2471 warning (OPT_Wattributes, "%qs attribute ignored",
2472 IDENTIFIER_POINTER (name));
2473 *no_add_attrs = true;
2477 /* Can combine fastcall with stdcall (redundant), x87regparm
2479 if (is_attribute_p ("fastcall", name))
2481 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2483 error ("fastcall and cdecl attributes are not compatible");
2485 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2487 error ("fastcall and stdcall attributes are not compatible");
2489 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2491 error ("fastcall and regparm attributes are not compatible");
2495 /* Can combine stdcall with fastcall (redundant), regparm,
2496 x87regparm and sseregparm. */
2497 else if (is_attribute_p ("stdcall", name))
2499 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2501 error ("stdcall and cdecl attributes are not compatible");
2503 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2505 error ("stdcall and fastcall attributes are not compatible");
2509 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2510 else if (is_attribute_p ("cdecl", name))
2512 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2514 error ("stdcall and cdecl attributes are not compatible");
2516 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2518 error ("fastcall and cdecl attributes are not compatible");
2522 /* Can combine x87regparm or sseregparm with all attributes. */
2527 /* Return 0 if the attributes for two types are incompatible, 1 if they
2528 are compatible, and 2 if they are nearly compatible (which causes a
2529 warning to be generated). */
2532 ix86_comp_type_attributes (tree type1, tree type2)
2534 /* Check for mismatch of non-default calling convention. */
2535 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2537 if (TREE_CODE (type1) != FUNCTION_TYPE)
2540 /* Check for mismatched fastcall/regparm types. */
2541 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2542 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2543 || (ix86_function_regparm (type1, NULL)
2544 != ix86_function_regparm (type2, NULL)))
2547 /* Check for mismatched x87regparm types. */
2548 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2549 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2552 /* Check for mismatched sseregparm types. */
2553 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2554 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2557 /* Check for mismatched return types (cdecl vs stdcall). */
2558 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2559 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2565 /* Return the regparm value for a function with the indicated TYPE and DECL.
2566 DECL may be NULL when calling function indirectly
2567 or considering a libcall. */
2570 ix86_function_regparm (tree type, tree decl)
2573 int regparm = ix86_regparm;
2574 bool user_convention = false;
2578 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2581 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2582 user_convention = true;
2585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2588 user_convention = true;
2591 /* Use register calling convention for local functions when possible. */
2592 if (!TARGET_64BIT && !user_convention && decl
2593 && flag_unit_at_a_time && !profile_flag)
2595 struct cgraph_local_info *i = cgraph_local_info (decl);
2598 int local_regparm, globals = 0, regno;
2600 /* Make sure no regparm register is taken by a global register
2602 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2603 if (global_regs[local_regparm])
2605 /* We can't use regparm(3) for nested functions as these use
2606 static chain pointer in third argument. */
2607 if (local_regparm == 3
2608 && decl_function_context (decl)
2609 && !DECL_NO_STATIC_CHAIN (decl))
2611 /* If the function realigns its stackpointer, the
2612 prologue will clobber %ecx. If we've already
2613 generated code for the callee, the callee
2614 DECL_STRUCT_FUNCTION is gone, so we fall back to
2615 scanning the attributes for the self-realigning
2617 if ((DECL_STRUCT_FUNCTION (decl)
2618 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2619 || (!DECL_STRUCT_FUNCTION (decl)
2620 && lookup_attribute (ix86_force_align_arg_pointer_string,
2621 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2623 /* Each global register variable increases register preassure,
2624 so the more global reg vars there are, the smaller regparm
2625 optimization use, unless requested by the user explicitly. */
2626 for (regno = 0; regno < 6; regno++)
2627 if (global_regs[regno])
2630 = globals < local_regparm ? local_regparm - globals : 0;
2632 if (local_regparm > regparm)
2633 regparm = local_regparm;
2640 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2641 arguments in x87 registers for a function with the indicated
2642 TYPE and DECL. DECL may be NULL when calling function indirectly
2643 or considering a libcall. For local functions, return 2.
2644 Otherwise return 0. */
2647 ix86_function_x87regparm (tree type, tree decl)
2649 /* Use x87 registers to pass floating point arguments if requested
2650 by the x87regparm attribute. */
2651 if (TARGET_X87REGPARM
2653 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2658 error ("Calling %qD with attribute x87regparm without "
2659 "80387 enabled", decl);
2661 error ("Calling %qT with attribute x87regparm without "
2662 "80387 enabled", type);
2669 /* For local functions, pass up to X87_REGPARM_MAX floating point
2670 arguments in x87 registers. */
2671 if (!TARGET_64BIT && decl
2672 && flag_unit_at_a_time && !profile_flag)
2674 struct cgraph_local_info *i = cgraph_local_info (decl);
2682 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2683 in SSE registers for a function with the indicated TYPE and DECL.
2684 DECL may be NULL when calling function indirectly
2685 or considering a libcall. Otherwise return 0. */
2688 ix86_function_sseregparm (tree type, tree decl)
2690 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2691 by the sseregparm attribute. */
2692 if (TARGET_SSEREGPARM
2694 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2699 error ("Calling %qD with attribute sseregparm without "
2700 "SSE/SSE2 enabled", decl);
2702 error ("Calling %qT with attribute sseregparm without "
2703 "SSE/SSE2 enabled", type);
2710 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2711 in SSE registers even for 32-bit mode and not just 3, but up to
2712 8 SSE arguments in registers. */
2713 if (!TARGET_64BIT && decl
2714 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2716 struct cgraph_local_info *i = cgraph_local_info (decl);
2718 return TARGET_SSE2 ? 2 : 1;
2724 /* Return true if EAX is live at the start of the function. Used by
2725 ix86_expand_prologue to determine if we need special help before
2726 calling allocate_stack_worker. */
2729 ix86_eax_live_at_start_p (void)
2731 /* Cheat. Don't bother working forward from ix86_function_regparm
2732 to the function type to whether an actual argument is located in
2733 eax. Instead just look at cfg info, which is still close enough
2734 to correct at this point. This gives false positives for broken
2735 functions that might use uninitialized data that happens to be
2736 allocated in eax, but who cares? */
2737 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2740 /* Value is the number of bytes of arguments automatically
2741 popped when returning from a subroutine call.
2742 FUNDECL is the declaration node of the function (as a tree),
2743 FUNTYPE is the data type of the function (as a tree),
2744 or for a library call it is an identifier node for the subroutine name.
2745 SIZE is the number of bytes of arguments passed on the stack.
2747 On the 80386, the RTD insn may be used to pop them if the number
2748 of args is fixed, but if the number is variable then the caller
2749 must pop them all. RTD can't be used for library calls now
2750 because the library is compiled with the Unix compiler.
2751 Use of RTD is a selectable option, since it is incompatible with
2752 standard Unix calling sequences. If the option is not selected,
2753 the caller must always pop the args.
2755 The attribute stdcall is equivalent to RTD on a per module basis. */
2758 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2760 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2762 /* Cdecl functions override -mrtd, and never pop the stack. */
2763 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2765 /* Stdcall and fastcall functions will pop the stack if not
2767 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2768 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2772 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2773 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2774 == void_type_node)))
2778 /* Lose any fake structure return argument if it is passed on the stack. */
2779 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2781 && !KEEP_AGGREGATE_RETURN_POINTER)
2783 int nregs = ix86_function_regparm (funtype, fundecl);
2786 return GET_MODE_SIZE (Pmode);
2792 /* Argument support functions. */
2794 /* Return true when register may be used to pass function parameters. */
2796 ix86_function_arg_regno_p (int regno)
2800 return (regno < REGPARM_MAX
2801 || (TARGET_80387 && FP_REGNO_P (regno)
2802 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2803 || (TARGET_MMX && MMX_REGNO_P (regno)
2804 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2805 || (TARGET_SSE && SSE_REGNO_P (regno)
2806 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2808 if (TARGET_SSE && SSE_REGNO_P (regno)
2809 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2811 /* RAX is used as hidden argument to va_arg functions. */
2814 for (i = 0; i < REGPARM_MAX; i++)
2815 if (regno == x86_64_int_parameter_registers[i])
2820 /* Return if we do not know how to pass TYPE solely in registers. */
2823 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2825 if (must_pass_in_stack_var_size_or_pad (mode, type))
2828 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2829 The layout_type routine is crafty and tries to trick us into passing
2830 currently unsupported vector types on the stack by using TImode. */
2831 return (!TARGET_64BIT && mode == TImode
2832 && type && TREE_CODE (type) != VECTOR_TYPE);
2835 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2836 for a call to a function whose data type is FNTYPE.
2837 For a library call, FNTYPE is 0. */
2840 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2841 tree fntype, /* tree ptr for function decl */
2842 rtx libname, /* SYMBOL_REF of library name or 0 */
2845 static CUMULATIVE_ARGS zero_cum;
2846 tree param, next_param;
2848 if (TARGET_DEBUG_ARG)
2850 fprintf (stderr, "\ninit_cumulative_args (");
2852 fprintf (stderr, "fntype code = %s, ret code = %s",
2853 tree_code_name[(int) TREE_CODE (fntype)],
2854 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2856 fprintf (stderr, "no fntype");
2859 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2864 /* Set up the number of registers to use for passing arguments. */
2865 cum->nregs = ix86_regparm;
2867 cum->x87_nregs = X87_REGPARM_MAX;
2869 cum->sse_nregs = SSE_REGPARM_MAX;
2871 cum->mmx_nregs = MMX_REGPARM_MAX;
2872 cum->warn_sse = true;
2873 cum->warn_mmx = true;
2874 cum->maybe_vaarg = false;
2876 /* Use ecx and edx registers if function has fastcall attribute,
2877 else look for regparm information. */
2878 if (fntype && !TARGET_64BIT)
2880 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2886 cum->nregs = ix86_function_regparm (fntype, fndecl);
2889 /* Set up the number of 80387 registers used for passing
2890 floating point arguments. Warn for mismatching ABI. */
2891 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
2893 /* Set up the number of SSE registers used for passing SFmode
2894 and DFmode arguments. Warn for mismatching ABI. */
2895 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2897 /* Determine if this function has variable arguments. This is
2898 indicated by the last argument being 'void_type_mode' if there
2899 are no variable arguments. If there are variable arguments, then
2900 we won't pass anything in registers in 32-bit mode. */
2902 if (cum->nregs || cum->mmx_nregs
2903 || cum->x87_nregs || cum->sse_nregs)
2905 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2906 param != 0; param = next_param)
2908 next_param = TREE_CHAIN (param);
2909 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2920 cum->float_in_x87 = 0;
2921 cum->float_in_sse = 0;
2923 cum->maybe_vaarg = true;
2927 if ((!fntype && !libname)
2928 || (fntype && !TYPE_ARG_TYPES (fntype)))
2929 cum->maybe_vaarg = true;
2931 if (TARGET_DEBUG_ARG)
2932 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2937 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2938 But in the case of vector types, it is some vector mode.
2940 When we have only some of our vector isa extensions enabled, then there
2941 are some modes for which vector_mode_supported_p is false. For these
2942 modes, the generic vector support in gcc will choose some non-vector mode
2943 in order to implement the type. By computing the natural mode, we'll
2944 select the proper ABI location for the operand and not depend on whatever
2945 the middle-end decides to do with these vector types. */
2947 static enum machine_mode
2948 type_natural_mode (tree type)
2950 enum machine_mode mode = TYPE_MODE (type);
2952 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2954 HOST_WIDE_INT size = int_size_in_bytes (type);
2955 if ((size == 8 || size == 16)
2956 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2957 && TYPE_VECTOR_SUBPARTS (type) > 1)
2959 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2961 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2962 mode = MIN_MODE_VECTOR_FLOAT;
2964 mode = MIN_MODE_VECTOR_INT;
2966 /* Get the mode which has this inner mode and number of units. */
2967 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2968 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2969 && GET_MODE_INNER (mode) == innermode)
2979 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2980 this may not agree with the mode that the type system has chosen for the
2981 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2982 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2985 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2990 if (orig_mode != BLKmode)
2991 tmp = gen_rtx_REG (orig_mode, regno);
2994 tmp = gen_rtx_REG (mode, regno);
2995 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2996 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3002 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3003 of this code is to classify each 8bytes of incoming argument by the register
3004 class and assign registers accordingly. */
3006 /* Return the union class of CLASS1 and CLASS2.
3007 See the x86-64 PS ABI for details. */
3009 static enum x86_64_reg_class
3010 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3012 /* Rule #1: If both classes are equal, this is the resulting class. */
3013 if (class1 == class2)
3016 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3018 if (class1 == X86_64_NO_CLASS)
3020 if (class2 == X86_64_NO_CLASS)
3023 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3024 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3025 return X86_64_MEMORY_CLASS;
3027 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3028 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3029 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3030 return X86_64_INTEGERSI_CLASS;
3031 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3032 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3033 return X86_64_INTEGER_CLASS;
3035 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3037 if (class1 == X86_64_X87_CLASS
3038 || class1 == X86_64_X87UP_CLASS
3039 || class1 == X86_64_COMPLEX_X87_CLASS
3040 || class2 == X86_64_X87_CLASS
3041 || class2 == X86_64_X87UP_CLASS
3042 || class2 == X86_64_COMPLEX_X87_CLASS)
3043 return X86_64_MEMORY_CLASS;
3045 /* Rule #6: Otherwise class SSE is used. */
3046 return X86_64_SSE_CLASS;
3049 /* Classify the argument of type TYPE and mode MODE.
3050 CLASSES will be filled by the register class used to pass each word
3051 of the operand. The number of words is returned. In case the parameter
3052 should be passed in memory, 0 is returned. As a special case for zero
3053 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3055 BIT_OFFSET is used internally for handling records and specifies offset
3056 of the offset in bits modulo 256 to avoid overflow cases.
3058 See the x86-64 PS ABI for details.
3062 classify_argument (enum machine_mode mode, tree type,
3063 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3065 HOST_WIDE_INT bytes =
3066 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3067 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3069 /* Variable sized entities are always passed/returned in memory. */
3073 if (mode != VOIDmode
3074 && targetm.calls.must_pass_in_stack (mode, type))
3077 if (type && AGGREGATE_TYPE_P (type))
3081 enum x86_64_reg_class subclasses[MAX_CLASSES];
3083 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3087 for (i = 0; i < words; i++)
3088 classes[i] = X86_64_NO_CLASS;
3090 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3091 signalize memory class, so handle it as special case. */
3094 classes[0] = X86_64_NO_CLASS;
3098 /* Classify each field of record and merge classes. */
3099 switch (TREE_CODE (type))
3102 /* And now merge the fields of structure. */
3103 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3105 if (TREE_CODE (field) == FIELD_DECL)
3109 if (TREE_TYPE (field) == error_mark_node)
3112 /* Bitfields are always classified as integer. Handle them
3113 early, since later code would consider them to be
3114 misaligned integers. */
3115 if (DECL_BIT_FIELD (field))
3117 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3118 i < ((int_bit_position (field) + (bit_offset % 64))
3119 + tree_low_cst (DECL_SIZE (field), 0)
3122 merge_classes (X86_64_INTEGER_CLASS,
3127 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3128 TREE_TYPE (field), subclasses,
3129 (int_bit_position (field)
3130 + bit_offset) % 256);
3133 for (i = 0; i < num; i++)
3136 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3138 merge_classes (subclasses[i], classes[i + pos]);
3146 /* Arrays are handled as small records. */
3149 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3150 TREE_TYPE (type), subclasses, bit_offset);
3154 /* The partial classes are now full classes. */
3155 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3156 subclasses[0] = X86_64_SSE_CLASS;
3157 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3158 subclasses[0] = X86_64_INTEGER_CLASS;
3160 for (i = 0; i < words; i++)
3161 classes[i] = subclasses[i % num];
3166 case QUAL_UNION_TYPE:
3167 /* Unions are similar to RECORD_TYPE but offset is always 0.
3169 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3171 if (TREE_CODE (field) == FIELD_DECL)
3175 if (TREE_TYPE (field) == error_mark_node)
3178 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3179 TREE_TYPE (field), subclasses,
3183 for (i = 0; i < num; i++)
3184 classes[i] = merge_classes (subclasses[i], classes[i]);
3193 /* Final merger cleanup. */
3194 for (i = 0; i < words; i++)
3196 /* If one class is MEMORY, everything should be passed in
3198 if (classes[i] == X86_64_MEMORY_CLASS)
3201 /* The X86_64_SSEUP_CLASS should be always preceded by
3202 X86_64_SSE_CLASS. */
3203 if (classes[i] == X86_64_SSEUP_CLASS
3204 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3205 classes[i] = X86_64_SSE_CLASS;
3207 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3208 if (classes[i] == X86_64_X87UP_CLASS
3209 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3210 classes[i] = X86_64_SSE_CLASS;
3215 /* Compute alignment needed. We align all types to natural boundaries with
3216 exception of XFmode that is aligned to 64bits. */
3217 if (mode != VOIDmode && mode != BLKmode)
3219 int mode_alignment = GET_MODE_BITSIZE (mode);
3222 mode_alignment = 128;
3223 else if (mode == XCmode)
3224 mode_alignment = 256;
3225 if (COMPLEX_MODE_P (mode))
3226 mode_alignment /= 2;
3227 /* Misaligned fields are always returned in memory. */
3228 if (bit_offset % mode_alignment)
3232 /* for V1xx modes, just use the base mode */
3233 if (VECTOR_MODE_P (mode)
3234 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3235 mode = GET_MODE_INNER (mode);
3237 /* Classification of atomic types. */
3242 classes[0] = X86_64_SSE_CLASS;
3245 classes[0] = X86_64_SSE_CLASS;
3246 classes[1] = X86_64_SSEUP_CLASS;
3255 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3256 classes[0] = X86_64_INTEGERSI_CLASS;
3258 classes[0] = X86_64_INTEGER_CLASS;
3262 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3267 if (!(bit_offset % 64))
3268 classes[0] = X86_64_SSESF_CLASS;
3270 classes[0] = X86_64_SSE_CLASS;
3273 classes[0] = X86_64_SSEDF_CLASS;
3276 classes[0] = X86_64_X87_CLASS;
3277 classes[1] = X86_64_X87UP_CLASS;
3280 classes[0] = X86_64_SSE_CLASS;
3281 classes[1] = X86_64_SSEUP_CLASS;
3284 classes[0] = X86_64_SSE_CLASS;
3287 classes[0] = X86_64_SSEDF_CLASS;
3288 classes[1] = X86_64_SSEDF_CLASS;
3291 classes[0] = X86_64_COMPLEX_X87_CLASS;
3294 /* This modes is larger than 16 bytes. */
3302 classes[0] = X86_64_SSE_CLASS;
3303 classes[1] = X86_64_SSEUP_CLASS;
3309 classes[0] = X86_64_SSE_CLASS;
3315 gcc_assert (VECTOR_MODE_P (mode));
3320 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3322 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3323 classes[0] = X86_64_INTEGERSI_CLASS;
3325 classes[0] = X86_64_INTEGER_CLASS;
3326 classes[1] = X86_64_INTEGER_CLASS;
3327 return 1 + (bytes > 8);
3331 /* Examine the argument and return set number of register required in each
3332 class. Return 0 iff parameter should be passed in memory. */
3334 examine_argument (enum machine_mode mode, tree type, int in_return,
3335 int *int_nregs, int *sse_nregs)
3337 enum x86_64_reg_class class[MAX_CLASSES];
3338 int n = classify_argument (mode, type, class, 0);
3344 for (n--; n >= 0; n--)
3347 case X86_64_INTEGER_CLASS:
3348 case X86_64_INTEGERSI_CLASS:
3351 case X86_64_SSE_CLASS:
3352 case X86_64_SSESF_CLASS:
3353 case X86_64_SSEDF_CLASS:
3356 case X86_64_NO_CLASS:
3357 case X86_64_SSEUP_CLASS:
3359 case X86_64_X87_CLASS:
3360 case X86_64_X87UP_CLASS:
3364 case X86_64_COMPLEX_X87_CLASS:
3365 return in_return ? 2 : 0;
3366 case X86_64_MEMORY_CLASS:
3372 /* Construct container for the argument used by GCC interface. See
3373 FUNCTION_ARG for the detailed description. */
3376 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3377 tree type, int in_return, int nintregs, int nsseregs,
3378 const int *intreg, int sse_regno)
3380 /* The following variables hold the static issued_error state. */
3381 static bool issued_sse_arg_error;
3382 static bool issued_sse_ret_error;
3383 static bool issued_x87_ret_error;
3385 enum machine_mode tmpmode;
3387 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3388 enum x86_64_reg_class class[MAX_CLASSES];
3392 int needed_sseregs, needed_intregs;
3393 rtx exp[MAX_CLASSES];
3396 n = classify_argument (mode, type, class, 0);
3397 if (TARGET_DEBUG_ARG)
3400 fprintf (stderr, "Memory class\n");
3403 fprintf (stderr, "Classes:");
3404 for (i = 0; i < n; i++)
3406 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3408 fprintf (stderr, "\n");
3413 if (!examine_argument (mode, type, in_return, &needed_intregs,
3416 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3419 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3420 some less clueful developer tries to use floating-point anyway. */
3421 if (needed_sseregs && !TARGET_SSE)
3425 if (!issued_sse_ret_error)
3427 error ("SSE register return with SSE disabled");
3428 issued_sse_ret_error = true;
3431 else if (!issued_sse_arg_error)
3433 error ("SSE register argument with SSE disabled");
3434 issued_sse_arg_error = true;
3439 /* Likewise, error if the ABI requires us to return values in the
3440 x87 registers and the user specified -mno-80387. */
3441 if (!TARGET_80387 && in_return)
3442 for (i = 0; i < n; i++)
3443 if (class[i] == X86_64_X87_CLASS
3444 || class[i] == X86_64_X87UP_CLASS
3445 || class[i] == X86_64_COMPLEX_X87_CLASS)
3447 if (!issued_x87_ret_error)
3449 error ("x87 register return with x87 disabled");
3450 issued_x87_ret_error = true;
3455 /* First construct simple cases. Avoid SCmode, since we want to use
3456 single register to pass this type. */
3457 if (n == 1 && mode != SCmode)
3460 case X86_64_INTEGER_CLASS:
3461 case X86_64_INTEGERSI_CLASS:
3462 return gen_rtx_REG (mode, intreg[0]);
3463 case X86_64_SSE_CLASS:
3464 case X86_64_SSESF_CLASS:
3465 case X86_64_SSEDF_CLASS:
3466 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3467 case X86_64_X87_CLASS:
3468 case X86_64_COMPLEX_X87_CLASS:
3469 return gen_rtx_REG (mode, FIRST_STACK_REG);
3470 case X86_64_NO_CLASS:
3471 /* Zero sized array, struct or class. */
3476 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3478 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3480 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3481 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3482 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3483 && class[1] == X86_64_INTEGER_CLASS
3484 && (mode == CDImode || mode == TImode || mode == TFmode)
3485 && intreg[0] + 1 == intreg[1])
3486 return gen_rtx_REG (mode, intreg[0]);
3488 /* Otherwise figure out the entries of the PARALLEL. */
3489 for (i = 0; i < n; i++)
3493 case X86_64_NO_CLASS:
3495 case X86_64_INTEGER_CLASS:
3496 case X86_64_INTEGERSI_CLASS:
3497 /* Merge TImodes on aligned occasions here too. */
3498 if (i * 8 + 8 > bytes)
3499 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3500 else if (class[i] == X86_64_INTEGERSI_CLASS)
3504 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3505 if (tmpmode == BLKmode)
3507 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3508 gen_rtx_REG (tmpmode, *intreg),
3512 case X86_64_SSESF_CLASS:
3513 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3514 gen_rtx_REG (SFmode,
3515 SSE_REGNO (sse_regno)),
3519 case X86_64_SSEDF_CLASS:
3520 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3521 gen_rtx_REG (DFmode,
3522 SSE_REGNO (sse_regno)),
3526 case X86_64_SSE_CLASS:
3527 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3531 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3532 gen_rtx_REG (tmpmode,
3533 SSE_REGNO (sse_regno)),
3535 if (tmpmode == TImode)
3544 /* Empty aligned struct, union or class. */
3548 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3549 for (i = 0; i < nexps; i++)
3550 XVECEXP (ret, 0, i) = exp [i];
3554 /* Update the data in CUM to advance over an argument
3555 of mode MODE and data type TYPE.
3556 (TYPE is null for libcalls where that information may not be available.) */
3559 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3560 tree type, int named)
3563 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3564 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3567 mode = type_natural_mode (type);
3569 if (TARGET_DEBUG_ARG)
3570 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3571 "mode=%s, named=%d)\n\n",
3572 words, cum->words, cum->nregs, cum->sse_nregs,
3573 GET_MODE_NAME (mode), named);
3577 int int_nregs, sse_nregs;
3578 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3579 cum->words += words;
3580 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3582 cum->nregs -= int_nregs;
3583 cum->sse_nregs -= sse_nregs;
3584 cum->regno += int_nregs;
3585 cum->sse_regno += sse_nregs;
3588 cum->words += words;
3606 cum->words += words;
3607 cum->nregs -= words;
3608 cum->regno += words;
3610 if (cum->nregs <= 0)
3618 if (cum->float_in_sse > 0)
3622 if (cum->float_in_sse > 1)
3625 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3626 rounding takes place when values are passed in x87
3627 registers, pass DFmode and SFmode types to local functions
3628 only when flag_unsafe_math_optimizations is set. */
3629 if (!cum->float_in_x87
3630 || (cum->float_in_x87 == 2
3631 && !flag_unsafe_math_optimizations))
3635 if (!cum->float_in_x87)
3638 if (!type || !AGGREGATE_TYPE_P (type))
3640 cum->x87_nregs -= 1;
3641 cum->x87_regno += 1;
3642 if (cum->x87_nregs <= 0)
3659 if (!type || !AGGREGATE_TYPE_P (type))
3661 cum->sse_nregs -= 1;
3662 cum->sse_regno += 1;
3663 if (cum->sse_nregs <= 0)
3675 if (!type || !AGGREGATE_TYPE_P (type))
3677 cum->mmx_nregs -= 1;
3678 cum->mmx_regno += 1;
3679 if (cum->mmx_nregs <= 0)
3690 /* Define where to put the arguments to a function.
3691 Value is zero to push the argument on the stack,
3692 or a hard register in which to store the argument.
3694 MODE is the argument's machine mode.
3695 TYPE is the data type of the argument (as a tree).
3696 This is null for libcalls where that information may
3698 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3699 the preceding args and about the function being called.
3700 NAMED is nonzero if this argument is a named parameter
3701 (otherwise it is an extra parameter matching an ellipsis). */
3704 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3705 tree type, int named)
3707 enum machine_mode mode = orig_mode;
3710 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3711 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3712 static bool warnedsse, warnedmmx;
3714 /* To simplify the code below, represent vector types with a vector mode
3715 even if MMX/SSE are not active. */
3716 if (type && TREE_CODE (type) == VECTOR_TYPE)
3717 mode = type_natural_mode (type);
3719 /* Handle a hidden AL argument containing number of registers for varargs
3720 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3722 if (mode == VOIDmode)
3725 return GEN_INT (cum->maybe_vaarg
3726 ? (cum->sse_nregs < 0
3734 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3736 &x86_64_int_parameter_registers [cum->regno],
3752 if (words <= cum->nregs)
3754 int regno = cum->regno;
3756 /* Fastcall allocates the first two DWORD (SImode) or
3757 smaller arguments to ECX and EDX. */
3760 if (mode == BLKmode || mode == DImode)
3763 /* ECX not EAX is the first allocated register. */
3767 ret = gen_rtx_REG (mode, regno);
3772 if (cum->float_in_sse > 0)
3776 if (cum->float_in_sse > 1)
3779 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3780 rounding takes place when values are passed in x87
3781 registers, pass DFmode and SFmode types to local functions
3782 only when flag_unsafe_math_optimizations is set. */
3783 if (!cum->float_in_x87
3784 || (cum->float_in_x87 == 2
3785 && !flag_unsafe_math_optimizations))
3789 if (!cum->float_in_x87)
3792 if (!type || !AGGREGATE_TYPE_P (type))
3794 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3806 if (!type || !AGGREGATE_TYPE_P (type))
3808 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3811 warning (0, "SSE vector argument without SSE enabled "
3815 ret = gen_reg_or_parallel (mode, orig_mode,
3816 cum->sse_regno + FIRST_SSE_REG);
3823 if (!type || !AGGREGATE_TYPE_P (type))
3825 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3828 warning (0, "MMX vector argument without MMX enabled "
3832 ret = gen_reg_or_parallel (mode, orig_mode,
3833 cum->mmx_regno + FIRST_MMX_REG);
3838 if (TARGET_DEBUG_ARG)
3841 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3842 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3845 print_simple_rtl (stderr, ret);
3847 fprintf (stderr, ", stack");
3849 fprintf (stderr, " )\n");
3855 /* A C expression that indicates when an argument must be passed by
3856 reference. If nonzero for an argument, a copy of that argument is
3857 made in memory and a pointer to the argument is passed instead of
3858 the argument itself. The pointer is passed in whatever way is
3859 appropriate for passing a pointer to that type. */
3862 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3863 enum machine_mode mode ATTRIBUTE_UNUSED,
3864 tree type, bool named ATTRIBUTE_UNUSED)
3869 if (type && int_size_in_bytes (type) == -1)
3871 if (TARGET_DEBUG_ARG)
3872 fprintf (stderr, "function_arg_pass_by_reference\n");
3879 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3880 ABI. Only called if TARGET_SSE. */
3882 contains_128bit_aligned_vector_p (tree type)
3884 enum machine_mode mode = TYPE_MODE (type);
3885 if (SSE_REG_MODE_P (mode)
3886 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3888 if (TYPE_ALIGN (type) < 128)
3891 if (AGGREGATE_TYPE_P (type))
3893 /* Walk the aggregates recursively. */
3894 switch (TREE_CODE (type))
3898 case QUAL_UNION_TYPE:
3902 /* Walk all the structure fields. */
3903 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3905 if (TREE_CODE (field) == FIELD_DECL
3906 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3913 /* Just for use if some languages passes arrays by value. */
3914 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3925 /* Gives the alignment boundary, in bits, of an argument with the
3926 specified mode and type. */
3929 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3933 align = TYPE_ALIGN (type);
3935 align = GET_MODE_ALIGNMENT (mode);
3936 if (align < PARM_BOUNDARY)
3937 align = PARM_BOUNDARY;
3940 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3941 make an exception for SSE modes since these require 128bit
3944 The handling here differs from field_alignment. ICC aligns MMX
3945 arguments to 4 byte boundaries, while structure fields are aligned
3946 to 8 byte boundaries. */
3948 align = PARM_BOUNDARY;
3951 if (!SSE_REG_MODE_P (mode))
3952 align = PARM_BOUNDARY;
3956 if (!contains_128bit_aligned_vector_p (type))
3957 align = PARM_BOUNDARY;
3965 /* Return true if N is a possible register number of function value. */
3967 ix86_function_value_regno_p (int regno)
3970 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3971 || (regno == FIRST_SSE_REG && TARGET_SSE))
3975 && (regno == FIRST_MMX_REG && TARGET_MMX))
3981 /* Define how to find the value returned by a function.
3982 VALTYPE is the data type of the value (as a tree).
3983 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3984 otherwise, FUNC is 0. */
3986 ix86_function_value (tree valtype, tree fntype_or_decl,
3987 bool outgoing ATTRIBUTE_UNUSED)
3989 enum machine_mode natmode = type_natural_mode (valtype);
3993 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3994 1, REGPARM_MAX, SSE_REGPARM_MAX,
3995 x86_64_int_return_registers, 0);
3996 /* For zero sized structures, construct_container return NULL, but we
3997 need to keep rest of compiler happy by returning meaningful value. */
3999 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4004 tree fn = NULL_TREE, fntype;
4006 && DECL_P (fntype_or_decl))
4007 fn = fntype_or_decl;
4008 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4009 return gen_rtx_REG (TYPE_MODE (valtype),
4010 ix86_value_regno (natmode, fn, fntype));
4014 /* Return true iff type is returned in memory. */
4016 ix86_return_in_memory (tree type)
4018 int needed_intregs, needed_sseregs, size;
4019 enum machine_mode mode = type_natural_mode (type);
4022 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4024 if (mode == BLKmode)
4027 size = int_size_in_bytes (type);
4029 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4032 if (VECTOR_MODE_P (mode) || mode == TImode)
4034 /* User-created vectors small enough to fit in EAX. */
4038 /* MMX/3dNow values are returned in MM0,
4039 except when it doesn't exits. */
4041 return (TARGET_MMX ? 0 : 1);
4043 /* SSE values are returned in XMM0, except when it doesn't exist. */
4045 return (TARGET_SSE ? 0 : 1);
4059 /* When returning SSE vector types, we have a choice of either
4060 (1) being abi incompatible with a -march switch, or
4061 (2) generating an error.
4062 Given no good solution, I think the safest thing is one warning.
4063 The user won't be able to use -Werror, but....
4065 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4066 called in response to actually generating a caller or callee that
4067 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4068 via aggregate_value_p for general type probing from tree-ssa. */
4071 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4073 static bool warnedsse, warnedmmx;
4077 /* Look at the return type of the function, not the function type. */
4078 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4080 if (!TARGET_SSE && !warnedsse)
4083 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4086 warning (0, "SSE vector return without SSE enabled "
4091 if (!TARGET_MMX && !warnedmmx)
4093 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4096 warning (0, "MMX vector return without MMX enabled "
4105 /* Define how to find the value returned by a library function
4106 assuming the value has mode MODE. */
4108 ix86_libcall_value (enum machine_mode mode)
4122 return gen_rtx_REG (mode, FIRST_SSE_REG);
4125 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4129 return gen_rtx_REG (mode, 0);
4133 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4136 /* Given a mode, return the register to use for a return value. */
4139 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4141 gcc_assert (!TARGET_64BIT);
4143 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4144 we normally prevent this case when mmx is not available. However
4145 some ABIs may require the result to be returned like DImode. */
4146 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4147 return TARGET_MMX ? FIRST_MMX_REG : 0;
4149 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4150 we prevent this case when sse is not available. However some ABIs
4151 may require the result to be returned like integer TImode. */
4152 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4153 return TARGET_SSE ? FIRST_SSE_REG : 0;
4155 /* Decimal floating point values can go in %eax, unlike other float modes. */
4156 if (DECIMAL_FLOAT_MODE_P (mode))
4159 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4160 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4163 /* Floating point return values in %st(0), except for local functions when
4164 SSE math is enabled or for functions with sseregparm attribute. */
4165 if ((func || fntype)
4166 && (mode == SFmode || mode == DFmode))
4168 int sse_level = ix86_function_sseregparm (fntype, func);
4169 if ((sse_level >= 1 && mode == SFmode)
4170 || (sse_level == 2 && mode == DFmode))
4171 return FIRST_SSE_REG;
4174 return FIRST_FLOAT_REG;
4177 /* Create the va_list data type. */
4180 ix86_build_builtin_va_list (void)
4182 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4184 /* For i386 we use plain pointer to argument area. */
4186 return build_pointer_type (char_type_node);
4188 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4189 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4191 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4192 unsigned_type_node);
4193 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4194 unsigned_type_node);
4195 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4197 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4200 va_list_gpr_counter_field = f_gpr;
4201 va_list_fpr_counter_field = f_fpr;
4203 DECL_FIELD_CONTEXT (f_gpr) = record;
4204 DECL_FIELD_CONTEXT (f_fpr) = record;
4205 DECL_FIELD_CONTEXT (f_ovf) = record;
4206 DECL_FIELD_CONTEXT (f_sav) = record;
4208 TREE_CHAIN (record) = type_decl;
4209 TYPE_NAME (record) = type_decl;
4210 TYPE_FIELDS (record) = f_gpr;
4211 TREE_CHAIN (f_gpr) = f_fpr;
4212 TREE_CHAIN (f_fpr) = f_ovf;
4213 TREE_CHAIN (f_ovf) = f_sav;
4215 layout_type (record);
4217 /* The correct type is an array type of one element. */
4218 return build_array_type (record, build_index_type (size_zero_node));
4221 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4224 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4225 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4228 CUMULATIVE_ARGS next_cum;
4229 rtx save_area = NULL_RTX, mem;
4242 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4245 /* Indicate to allocate space on the stack for varargs save area. */
4246 ix86_save_varrargs_registers = 1;
4248 cfun->stack_alignment_needed = 128;
4250 fntype = TREE_TYPE (current_function_decl);
4251 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4252 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4253 != void_type_node));
4255 /* For varargs, we do not want to skip the dummy va_dcl argument.
4256 For stdargs, we do want to skip the last named argument. */
4259 function_arg_advance (&next_cum, mode, type, 1);
4262 save_area = frame_pointer_rtx;
4264 set = get_varargs_alias_set ();
4266 for (i = next_cum.regno;
4268 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4271 mem = gen_rtx_MEM (Pmode,
4272 plus_constant (save_area, i * UNITS_PER_WORD));
4273 MEM_NOTRAP_P (mem) = 1;
4274 set_mem_alias_set (mem, set);
4275 emit_move_insn (mem, gen_rtx_REG (Pmode,
4276 x86_64_int_parameter_registers[i]));
4279 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4281 /* Now emit code to save SSE registers. The AX parameter contains number
4282 of SSE parameter registers used to call this function. We use
4283 sse_prologue_save insn template that produces computed jump across
4284 SSE saves. We need some preparation work to get this working. */
4286 label = gen_label_rtx ();
4287 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4289 /* Compute address to jump to :
4290 label - 5*eax + nnamed_sse_arguments*5 */
4291 tmp_reg = gen_reg_rtx (Pmode);
4292 nsse_reg = gen_reg_rtx (Pmode);
4293 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4294 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4295 gen_rtx_MULT (Pmode, nsse_reg,
4297 if (next_cum.sse_regno)
4300 gen_rtx_CONST (DImode,
4301 gen_rtx_PLUS (DImode,
4303 GEN_INT (next_cum.sse_regno * 4))));
4305 emit_move_insn (nsse_reg, label_ref);
4306 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4308 /* Compute address of memory block we save into. We always use pointer
4309 pointing 127 bytes after first byte to store - this is needed to keep
4310 instruction size limited by 4 bytes. */
4311 tmp_reg = gen_reg_rtx (Pmode);
4312 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4313 plus_constant (save_area,
4314 8 * REGPARM_MAX + 127)));
4315 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4316 MEM_NOTRAP_P (mem) = 1;
4317 set_mem_alias_set (mem, set);
4318 set_mem_align (mem, BITS_PER_WORD);
4320 /* And finally do the dirty job! */
4321 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4322 GEN_INT (next_cum.sse_regno), label));
4327 /* Implement va_start. */
4330 ix86_va_start (tree valist, rtx nextarg)
4332 HOST_WIDE_INT words, n_gpr, n_fpr;
4333 tree f_gpr, f_fpr, f_ovf, f_sav;
4334 tree gpr, fpr, ovf, sav, t;
4337 /* Only 64bit target needs something special. */
4340 std_expand_builtin_va_start (valist, nextarg);
4344 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4345 f_fpr = TREE_CHAIN (f_gpr);
4346 f_ovf = TREE_CHAIN (f_fpr);
4347 f_sav = TREE_CHAIN (f_ovf);
4349 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4350 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4351 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4352 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4353 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4355 /* Count number of gp and fp argument registers used. */
4356 words = current_function_args_info.words;
4357 n_gpr = current_function_args_info.regno;
4358 n_fpr = current_function_args_info.sse_regno;
4360 if (TARGET_DEBUG_ARG)
4361 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4362 (int) words, (int) n_gpr, (int) n_fpr);
4364 if (cfun->va_list_gpr_size)
4366 type = TREE_TYPE (gpr);
4367 t = build2 (MODIFY_EXPR, type, gpr,
4368 build_int_cst (type, n_gpr * 8));
4369 TREE_SIDE_EFFECTS (t) = 1;
4370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4373 if (cfun->va_list_fpr_size)
4375 type = TREE_TYPE (fpr);
4376 t = build2 (MODIFY_EXPR, type, fpr,
4377 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4378 TREE_SIDE_EFFECTS (t) = 1;
4379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4382 /* Find the overflow area. */
4383 type = TREE_TYPE (ovf);
4384 t = make_tree (type, virtual_incoming_args_rtx);
4386 t = build2 (PLUS_EXPR, type, t,
4387 build_int_cst (type, words * UNITS_PER_WORD));
4388 t = build2 (MODIFY_EXPR, type, ovf, t);
4389 TREE_SIDE_EFFECTS (t) = 1;
4390 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4392 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4394 /* Find the register save area.
4395 Prologue of the function save it right above stack frame. */
4396 type = TREE_TYPE (sav);
4397 t = make_tree (type, frame_pointer_rtx);
4398 t = build2 (MODIFY_EXPR, type, sav, t);
4399 TREE_SIDE_EFFECTS (t) = 1;
4400 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4404 /* Implement va_arg. */
4407 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4409 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4410 tree f_gpr, f_fpr, f_ovf, f_sav;
4411 tree gpr, fpr, ovf, sav, t;
4413 tree lab_false, lab_over = NULL_TREE;
4418 enum machine_mode nat_mode;
4420 /* Only 64bit target needs something special. */
4422 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4424 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4425 f_fpr = TREE_CHAIN (f_gpr);
4426 f_ovf = TREE_CHAIN (f_fpr);
4427 f_sav = TREE_CHAIN (f_ovf);
4429 valist = build_va_arg_indirect_ref (valist);
4430 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4431 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4432 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4433 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4435 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4437 type = build_pointer_type (type);
4438 size = int_size_in_bytes (type);
4439 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4441 nat_mode = type_natural_mode (type);
4442 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4443 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4445 /* Pull the value out of the saved registers. */
4447 addr = create_tmp_var (ptr_type_node, "addr");
4448 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4452 int needed_intregs, needed_sseregs;
4454 tree int_addr, sse_addr;
4456 lab_false = create_artificial_label ();
4457 lab_over = create_artificial_label ();
4459 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4461 need_temp = (!REG_P (container)
4462 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4463 || TYPE_ALIGN (type) > 128));
4465 /* In case we are passing structure, verify that it is consecutive block
4466 on the register save area. If not we need to do moves. */
4467 if (!need_temp && !REG_P (container))
4469 /* Verify that all registers are strictly consecutive */
4470 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4474 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4476 rtx slot = XVECEXP (container, 0, i);
4477 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4478 || INTVAL (XEXP (slot, 1)) != i * 16)
4486 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4488 rtx slot = XVECEXP (container, 0, i);
4489 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4490 || INTVAL (XEXP (slot, 1)) != i * 8)
4502 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4503 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4504 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4505 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4508 /* First ensure that we fit completely in registers. */
4511 t = build_int_cst (TREE_TYPE (gpr),
4512 (REGPARM_MAX - needed_intregs + 1) * 8);
4513 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4514 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4515 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4516 gimplify_and_add (t, pre_p);
4520 t = build_int_cst (TREE_TYPE (fpr),
4521 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4523 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4524 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4525 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4526 gimplify_and_add (t, pre_p);
4529 /* Compute index to start of area used for integer regs. */
4532 /* int_addr = gpr + sav; */
4533 t = fold_convert (ptr_type_node, gpr);
4534 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4535 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4536 gimplify_and_add (t, pre_p);
4540 /* sse_addr = fpr + sav; */
4541 t = fold_convert (ptr_type_node, fpr);
4542 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4543 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4544 gimplify_and_add (t, pre_p);
4549 tree temp = create_tmp_var (type, "va_arg_tmp");
4552 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4553 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4554 gimplify_and_add (t, pre_p);
4556 for (i = 0; i < XVECLEN (container, 0); i++)
4558 rtx slot = XVECEXP (container, 0, i);
4559 rtx reg = XEXP (slot, 0);
4560 enum machine_mode mode = GET_MODE (reg);
4561 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4562 tree addr_type = build_pointer_type (piece_type);
4565 tree dest_addr, dest;
4567 if (SSE_REGNO_P (REGNO (reg)))
4569 src_addr = sse_addr;
4570 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4574 src_addr = int_addr;
4575 src_offset = REGNO (reg) * 8;
4577 src_addr = fold_convert (addr_type, src_addr);
4578 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4579 size_int (src_offset)));
4580 src = build_va_arg_indirect_ref (src_addr);
4582 dest_addr = fold_convert (addr_type, addr);
4583 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4584 size_int (INTVAL (XEXP (slot, 1)))));
4585 dest = build_va_arg_indirect_ref (dest_addr);
4587 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4588 gimplify_and_add (t, pre_p);
4594 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4595 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4596 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4597 gimplify_and_add (t, pre_p);
4601 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4602 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4603 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4604 gimplify_and_add (t, pre_p);
4607 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4608 gimplify_and_add (t, pre_p);
4610 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4611 append_to_statement_list (t, pre_p);
4614 /* ... otherwise out of the overflow area. */
4616 /* Care for on-stack alignment if needed. */
4617 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4618 || integer_zerop (TYPE_SIZE (type)))
4622 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4623 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4624 build_int_cst (TREE_TYPE (ovf), align - 1));
4625 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4626 build_int_cst (TREE_TYPE (t), -align));
4628 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4630 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4631 gimplify_and_add (t2, pre_p);
4633 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4634 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4635 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4636 gimplify_and_add (t, pre_p);
4640 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4641 append_to_statement_list (t, pre_p);
4644 ptrtype = build_pointer_type (type);
4645 addr = fold_convert (ptrtype, addr);
4648 addr = build_va_arg_indirect_ref (addr);
4649 return build_va_arg_indirect_ref (addr);
4652 /* Return nonzero if OPNUM's MEM should be matched
4653 in movabs* patterns. */
4656 ix86_check_movabs (rtx insn, int opnum)
4660 set = PATTERN (insn);
4661 if (GET_CODE (set) == PARALLEL)
4662 set = XVECEXP (set, 0, 0);
4663 gcc_assert (GET_CODE (set) == SET);
4664 mem = XEXP (set, opnum);
4665 while (GET_CODE (mem) == SUBREG)
4666 mem = SUBREG_REG (mem);
4667 gcc_assert (GET_CODE (mem) == MEM);
4668 return (volatile_ok || !MEM_VOLATILE_P (mem));
4671 /* Initialize the table of extra 80387 mathematical constants. */
4674 init_ext_80387_constants (void)
4676 static const char * cst[5] =
4678 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4679 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4680 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4681 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4682 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4686 for (i = 0; i < 5; i++)
4688 real_from_string (&ext_80387_constants_table[i], cst[i]);
4689 /* Ensure each constant is rounded to XFmode precision. */
4690 real_convert (&ext_80387_constants_table[i],
4691 XFmode, &ext_80387_constants_table[i]);
4694 ext_80387_constants_init = 1;
4697 /* Return true if the constant is something that can be loaded with
4698 a special instruction. */
4701 standard_80387_constant_p (rtx x)
4705 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4708 if (x == CONST0_RTX (GET_MODE (x)))
4710 if (x == CONST1_RTX (GET_MODE (x)))
4713 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4715 /* For XFmode constants, try to find a special 80387 instruction when
4716 optimizing for size or on those CPUs that benefit from them. */
4717 if (GET_MODE (x) == XFmode
4718 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4722 if (! ext_80387_constants_init)
4723 init_ext_80387_constants ();
4725 for (i = 0; i < 5; i++)
4726 if (real_identical (&r, &ext_80387_constants_table[i]))
4730 /* Load of the constant -0.0 or -1.0 will be split as
4731 fldz;fchs or fld1;fchs sequence. */
4732 if (real_isnegzero (&r))
4734 if (real_identical (&r, &dconstm1))
4740 /* Return the opcode of the special instruction to be used to load
4744 standard_80387_constant_opcode (rtx x)
4746 switch (standard_80387_constant_p (x))
4770 /* Return the CONST_DOUBLE representing the 80387 constant that is
4771 loaded by the specified special instruction. The argument IDX
4772 matches the return value from standard_80387_constant_p. */
4775 standard_80387_constant_rtx (int idx)
4779 if (! ext_80387_constants_init)
4780 init_ext_80387_constants ();
4796 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4800 /* Return 1 if mode is a valid mode for sse. */
4802 standard_sse_mode_p (enum machine_mode mode)
4819 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4822 standard_sse_constant_p (rtx x)
4824 enum machine_mode mode = GET_MODE (x);
4826 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4828 if (vector_all_ones_operand (x, mode)
4829 && standard_sse_mode_p (mode))
4830 return TARGET_SSE2 ? 2 : -1;
4835 /* Return the opcode of the special instruction to be used to load
4839 standard_sse_constant_opcode (rtx insn, rtx x)
4841 switch (standard_sse_constant_p (x))
4844 if (get_attr_mode (insn) == MODE_V4SF)
4845 return "xorps\t%0, %0";
4846 else if (get_attr_mode (insn) == MODE_V2DF)
4847 return "xorpd\t%0, %0";
4849 return "pxor\t%0, %0";
4851 return "pcmpeqd\t%0, %0";
4856 /* Returns 1 if OP contains a symbol reference */
4859 symbolic_reference_mentioned_p (rtx op)
4864 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4867 fmt = GET_RTX_FORMAT (GET_CODE (op));
4868 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4874 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4875 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4879 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4886 /* Return 1 if it is appropriate to emit `ret' instructions in the
4887 body of a function. Do this only if the epilogue is simple, needing a
4888 couple of insns. Prior to reloading, we can't tell how many registers
4889 must be saved, so return 0 then. Return 0 if there is no frame
4890 marker to de-allocate. */
4893 ix86_can_use_return_insn_p (void)
4895 struct ix86_frame frame;
4897 if (! reload_completed || frame_pointer_needed)
4900 /* Don't allow more than 32 pop, since that's all we can do
4901 with one instruction. */
4902 if (current_function_pops_args
4903 && current_function_args_size >= 32768)
4906 ix86_compute_frame_layout (&frame);
4907 return frame.to_allocate == 0 && frame.nregs == 0;
4910 /* Value should be nonzero if functions must have frame pointers.
4911 Zero means the frame pointer need not be set up (and parms may
4912 be accessed via the stack pointer) in functions that seem suitable. */
4915 ix86_frame_pointer_required (void)
4917 /* If we accessed previous frames, then the generated code expects
4918 to be able to access the saved ebp value in our frame. */
4919 if (cfun->machine->accesses_prev_frame)
4922 /* Several x86 os'es need a frame pointer for other reasons,
4923 usually pertaining to setjmp. */
4924 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4927 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4928 the frame pointer by default. Turn it back on now if we've not
4929 got a leaf function. */
4930 if (TARGET_OMIT_LEAF_FRAME_POINTER
4931 && (!current_function_is_leaf
4932 || ix86_current_function_calls_tls_descriptor))
4935 if (current_function_profile)
4941 /* Record that the current function accesses previous call frames. */
4944 ix86_setup_frame_addresses (void)
4946 cfun->machine->accesses_prev_frame = 1;
4949 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4950 # define USE_HIDDEN_LINKONCE 1
4952 # define USE_HIDDEN_LINKONCE 0
4955 static int pic_labels_used;
4957 /* Fills in the label name that should be used for a pc thunk for
4958 the given register. */
4961 get_pc_thunk_name (char name[32], unsigned int regno)
4963 gcc_assert (!TARGET_64BIT);
4965 if (USE_HIDDEN_LINKONCE)
4966 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4968 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4972 /* This function generates code for -fpic that loads %ebx with
4973 the return address of the caller and then returns. */
4976 ix86_file_end (void)
4981 for (regno = 0; regno < 8; ++regno)
4985 if (! ((pic_labels_used >> regno) & 1))
4988 get_pc_thunk_name (name, regno);
4993 switch_to_section (darwin_sections[text_coal_section]);
4994 fputs ("\t.weak_definition\t", asm_out_file);
4995 assemble_name (asm_out_file, name);
4996 fputs ("\n\t.private_extern\t", asm_out_file);
4997 assemble_name (asm_out_file, name);
4998 fputs ("\n", asm_out_file);
4999 ASM_OUTPUT_LABEL (asm_out_file, name);
5003 if (USE_HIDDEN_LINKONCE)
5007 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5009 TREE_PUBLIC (decl) = 1;
5010 TREE_STATIC (decl) = 1;
5011 DECL_ONE_ONLY (decl) = 1;
5013 (*targetm.asm_out.unique_section) (decl, 0);
5014 switch_to_section (get_named_section (decl, NULL, 0));
5016 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5017 fputs ("\t.hidden\t", asm_out_file);
5018 assemble_name (asm_out_file, name);
5019 fputc ('\n', asm_out_file);
5020 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5024 switch_to_section (text_section);
5025 ASM_OUTPUT_LABEL (asm_out_file, name);
5028 xops[0] = gen_rtx_REG (SImode, regno);
5029 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5030 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5031 output_asm_insn ("ret", xops);
5034 if (NEED_INDICATE_EXEC_STACK)
5035 file_end_indicate_exec_stack ();
5038 /* Emit code for the SET_GOT patterns. */
5041 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5046 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5048 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5050 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5053 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5055 output_asm_insn ("call\t%a2", xops);
5058 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5059 is what will be referenced by the Mach-O PIC subsystem. */
5061 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5064 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5065 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5068 output_asm_insn ("pop{l}\t%0", xops);
5073 get_pc_thunk_name (name, REGNO (dest));
5074 pic_labels_used |= 1 << REGNO (dest);
5076 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5077 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5078 output_asm_insn ("call\t%X2", xops);
5079 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5080 is what will be referenced by the Mach-O PIC subsystem. */
5083 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5085 targetm.asm_out.internal_label (asm_out_file, "L",
5086 CODE_LABEL_NUMBER (label));
5093 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5094 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5096 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5101 /* Generate an "push" pattern for input ARG. */
5106 return gen_rtx_SET (VOIDmode,
5108 gen_rtx_PRE_DEC (Pmode,
5109 stack_pointer_rtx)),
5113 /* Return >= 0 if there is an unused call-clobbered register available
5114 for the entire function. */
5117 ix86_select_alt_pic_regnum (void)
5119 if (current_function_is_leaf && !current_function_profile
5120 && !ix86_current_function_calls_tls_descriptor)
5123 for (i = 2; i >= 0; --i)
5124 if (!regs_ever_live[i])
5128 return INVALID_REGNUM;
5131 /* Return 1 if we need to save REGNO. */
5133 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5135 if (pic_offset_table_rtx
5136 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5137 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5138 || current_function_profile
5139 || current_function_calls_eh_return
5140 || current_function_uses_const_pool))
5142 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5147 if (current_function_calls_eh_return && maybe_eh_return)
5152 unsigned test = EH_RETURN_DATA_REGNO (i);
5153 if (test == INVALID_REGNUM)
5160 if (cfun->machine->force_align_arg_pointer
5161 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5164 return (regs_ever_live[regno]
5165 && !call_used_regs[regno]
5166 && !fixed_regs[regno]
5167 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5170 /* Return number of registers to be saved on the stack. */
5173 ix86_nsaved_regs (void)
5178 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5179 if (ix86_save_reg (regno, true))
5184 /* Return the offset between two registers, one to be eliminated, and the other
5185 its replacement, at the start of a routine. */
5188 ix86_initial_elimination_offset (int from, int to)
5190 struct ix86_frame frame;
5191 ix86_compute_frame_layout (&frame);
5193 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5194 return frame.hard_frame_pointer_offset;
5195 else if (from == FRAME_POINTER_REGNUM
5196 && to == HARD_FRAME_POINTER_REGNUM)
5197 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5200 gcc_assert (to == STACK_POINTER_REGNUM);
5202 if (from == ARG_POINTER_REGNUM)
5203 return frame.stack_pointer_offset;
5205 gcc_assert (from == FRAME_POINTER_REGNUM);
5206 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5210 /* Fill structure ix86_frame about frame of currently computed function. */
5213 ix86_compute_frame_layout (struct ix86_frame *frame)
5215 HOST_WIDE_INT total_size;
5216 unsigned int stack_alignment_needed;
5217 HOST_WIDE_INT offset;
5218 unsigned int preferred_alignment;
5219 HOST_WIDE_INT size = get_frame_size ();
5221 frame->nregs = ix86_nsaved_regs ();
5224 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5225 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5227 /* During reload iteration the amount of registers saved can change.
5228 Recompute the value as needed. Do not recompute when amount of registers
5229 didn't change as reload does multiple calls to the function and does not
5230 expect the decision to change within single iteration. */
5232 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5234 int count = frame->nregs;
5236 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5237 /* The fast prologue uses move instead of push to save registers. This
5238 is significantly longer, but also executes faster as modern hardware
5239 can execute the moves in parallel, but can't do that for push/pop.
5241 Be careful about choosing what prologue to emit: When function takes
5242 many instructions to execute we may use slow version as well as in
5243 case function is known to be outside hot spot (this is known with
5244 feedback only). Weight the size of function by number of registers
5245 to save as it is cheap to use one or two push instructions but very
5246 slow to use many of them. */
5248 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5249 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5250 || (flag_branch_probabilities
5251 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5252 cfun->machine->use_fast_prologue_epilogue = false;
5254 cfun->machine->use_fast_prologue_epilogue
5255 = !expensive_function_p (count);
5257 if (TARGET_PROLOGUE_USING_MOVE
5258 && cfun->machine->use_fast_prologue_epilogue)
5259 frame->save_regs_using_mov = true;
5261 frame->save_regs_using_mov = false;
5264 /* Skip return address and saved base pointer. */
5265 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5267 frame->hard_frame_pointer_offset = offset;
5269 /* Do some sanity checking of stack_alignment_needed and
5270 preferred_alignment, since i386 port is the only using those features
5271 that may break easily. */
5273 gcc_assert (!size || stack_alignment_needed);
5274 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5275 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5276 gcc_assert (stack_alignment_needed
5277 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5279 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5280 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5282 /* Register save area */
5283 offset += frame->nregs * UNITS_PER_WORD;
5286 if (ix86_save_varrargs_registers)
5288 offset += X86_64_VARARGS_SIZE;
5289 frame->va_arg_size = X86_64_VARARGS_SIZE;
5292 frame->va_arg_size = 0;
5294 /* Align start of frame for local function. */
5295 frame->padding1 = ((offset + stack_alignment_needed - 1)
5296 & -stack_alignment_needed) - offset;
5298 offset += frame->padding1;
5300 /* Frame pointer points here. */
5301 frame->frame_pointer_offset = offset;
5305 /* Add outgoing arguments area. Can be skipped if we eliminated
5306 all the function calls as dead code.
5307 Skipping is however impossible when function calls alloca. Alloca
5308 expander assumes that last current_function_outgoing_args_size
5309 of stack frame are unused. */
5310 if (ACCUMULATE_OUTGOING_ARGS
5311 && (!current_function_is_leaf || current_function_calls_alloca
5312 || ix86_current_function_calls_tls_descriptor))
5314 offset += current_function_outgoing_args_size;
5315 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5318 frame->outgoing_arguments_size = 0;
5320 /* Align stack boundary. Only needed if we're calling another function
5322 if (!current_function_is_leaf || current_function_calls_alloca
5323 || ix86_current_function_calls_tls_descriptor)
5324 frame->padding2 = ((offset + preferred_alignment - 1)
5325 & -preferred_alignment) - offset;
5327 frame->padding2 = 0;
5329 offset += frame->padding2;
5331 /* We've reached end of stack frame. */
5332 frame->stack_pointer_offset = offset;
5334 /* Size prologue needs to allocate. */
5335 frame->to_allocate =
5336 (size + frame->padding1 + frame->padding2
5337 + frame->outgoing_arguments_size + frame->va_arg_size);
5339 if ((!frame->to_allocate && frame->nregs <= 1)
5340 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5341 frame->save_regs_using_mov = false;
5343 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5344 && current_function_is_leaf
5345 && !ix86_current_function_calls_tls_descriptor)
5347 frame->red_zone_size = frame->to_allocate;
5348 if (frame->save_regs_using_mov)
5349 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5350 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5351 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5354 frame->red_zone_size = 0;
5355 frame->to_allocate -= frame->red_zone_size;
5356 frame->stack_pointer_offset -= frame->red_zone_size;
5358 fprintf (stderr, "nregs: %i\n", frame->nregs);
5359 fprintf (stderr, "size: %i\n", size);
5360 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5361 fprintf (stderr, "padding1: %i\n", frame->padding1);
5362 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5363 fprintf (stderr, "padding2: %i\n", frame->padding2);
5364 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5365 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5366 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5367 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5368 frame->hard_frame_pointer_offset);
5369 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5373 /* Emit code to save registers in the prologue. */
5376 ix86_emit_save_regs (void)
5381 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5382 if (ix86_save_reg (regno, true))
5384 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5385 RTX_FRAME_RELATED_P (insn) = 1;
5389 /* Emit code to save registers using MOV insns. First register
5390 is restored from POINTER + OFFSET. */
5392 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5397 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5398 if (ix86_save_reg (regno, true))
5400 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5402 gen_rtx_REG (Pmode, regno));
5403 RTX_FRAME_RELATED_P (insn) = 1;
5404 offset += UNITS_PER_WORD;
5408 /* Expand prologue or epilogue stack adjustment.
5409 The pattern exist to put a dependency on all ebp-based memory accesses.
5410 STYLE should be negative if instructions should be marked as frame related,
5411 zero if %r11 register is live and cannot be freely used and positive
5415 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5420 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5421 else if (x86_64_immediate_operand (offset, DImode))
5422 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5426 /* r11 is used by indirect sibcall return as well, set before the
5427 epilogue and used after the epilogue. ATM indirect sibcall
5428 shouldn't be used together with huge frame sizes in one
5429 function because of the frame_size check in sibcall.c. */
5431 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5432 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5434 RTX_FRAME_RELATED_P (insn) = 1;
5435 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5439 RTX_FRAME_RELATED_P (insn) = 1;
5442 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5445 ix86_internal_arg_pointer (void)
5447 bool has_force_align_arg_pointer =
5448 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5449 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5450 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5451 && DECL_NAME (current_function_decl)
5452 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5453 && DECL_FILE_SCOPE_P (current_function_decl))
5454 || ix86_force_align_arg_pointer
5455 || has_force_align_arg_pointer)
5457 /* Nested functions can't realign the stack due to a register
5459 if (DECL_CONTEXT (current_function_decl)
5460 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5462 if (ix86_force_align_arg_pointer)
5463 warning (0, "-mstackrealign ignored for nested functions");
5464 if (has_force_align_arg_pointer)
5465 error ("%s not supported for nested functions",
5466 ix86_force_align_arg_pointer_string);
5467 return virtual_incoming_args_rtx;
5469 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5470 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5473 return virtual_incoming_args_rtx;
5476 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5477 This is called from dwarf2out.c to emit call frame instructions
5478 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5480 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5482 rtx unspec = SET_SRC (pattern);
5483 gcc_assert (GET_CODE (unspec) == UNSPEC);
5487 case UNSPEC_REG_SAVE:
5488 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5489 SET_DEST (pattern));
5491 case UNSPEC_DEF_CFA:
5492 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5493 INTVAL (XVECEXP (unspec, 0, 0)));
5500 /* Expand the prologue into a bunch of separate insns. */
5503 ix86_expand_prologue (void)
5507 struct ix86_frame frame;
5508 HOST_WIDE_INT allocate;
5510 ix86_compute_frame_layout (&frame);
5512 if (cfun->machine->force_align_arg_pointer)
5516 /* Grab the argument pointer. */
5517 x = plus_constant (stack_pointer_rtx, 4);
5518 y = cfun->machine->force_align_arg_pointer;
5519 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5520 RTX_FRAME_RELATED_P (insn) = 1;
5522 /* The unwind info consists of two parts: install the fafp as the cfa,
5523 and record the fafp as the "save register" of the stack pointer.
5524 The later is there in order that the unwinder can see where it
5525 should restore the stack pointer across the and insn. */
5526 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5527 x = gen_rtx_SET (VOIDmode, y, x);
5528 RTX_FRAME_RELATED_P (x) = 1;
5529 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5531 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5532 RTX_FRAME_RELATED_P (y) = 1;
5533 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5534 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5535 REG_NOTES (insn) = x;
5537 /* Align the stack. */
5538 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5541 /* And here we cheat like madmen with the unwind info. We force the
5542 cfa register back to sp+4, which is exactly what it was at the
5543 start of the function. Re-pushing the return address results in
5544 the return at the same spot relative to the cfa, and thus is
5545 correct wrt the unwind info. */
5546 x = cfun->machine->force_align_arg_pointer;
5547 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5548 insn = emit_insn (gen_push (x));
5549 RTX_FRAME_RELATED_P (insn) = 1;
5552 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5553 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5554 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5555 REG_NOTES (insn) = x;
5558 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5559 slower on all targets. Also sdb doesn't like it. */
5561 if (frame_pointer_needed)
5563 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5564 RTX_FRAME_RELATED_P (insn) = 1;
5566 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5567 RTX_FRAME_RELATED_P (insn) = 1;
5570 allocate = frame.to_allocate;
5572 if (!frame.save_regs_using_mov)
5573 ix86_emit_save_regs ();
5575 allocate += frame.nregs * UNITS_PER_WORD;
5577 /* When using red zone we may start register saving before allocating
5578 the stack frame saving one cycle of the prologue. */
5579 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5580 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5581 : stack_pointer_rtx,
5582 -frame.nregs * UNITS_PER_WORD);
5586 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5587 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5588 GEN_INT (-allocate), -1);
5591 /* Only valid for Win32. */
5592 rtx eax = gen_rtx_REG (SImode, 0);
5593 bool eax_live = ix86_eax_live_at_start_p ();
5596 gcc_assert (!TARGET_64BIT);
5600 emit_insn (gen_push (eax));
5604 emit_move_insn (eax, GEN_INT (allocate));
5606 insn = emit_insn (gen_allocate_stack_worker (eax));
5607 RTX_FRAME_RELATED_P (insn) = 1;
5608 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5609 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5610 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5611 t, REG_NOTES (insn));
5615 if (frame_pointer_needed)
5616 t = plus_constant (hard_frame_pointer_rtx,
5619 - frame.nregs * UNITS_PER_WORD);
5621 t = plus_constant (stack_pointer_rtx, allocate);
5622 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5626 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5628 if (!frame_pointer_needed || !frame.to_allocate)
5629 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5631 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5632 -frame.nregs * UNITS_PER_WORD);
5635 pic_reg_used = false;
5636 if (pic_offset_table_rtx
5637 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5638 || current_function_profile))
5640 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5642 if (alt_pic_reg_used != INVALID_REGNUM)
5643 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5645 pic_reg_used = true;
5651 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5653 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5655 /* Even with accurate pre-reload life analysis, we can wind up
5656 deleting all references to the pic register after reload.
5657 Consider if cross-jumping unifies two sides of a branch
5658 controlled by a comparison vs the only read from a global.
5659 In which case, allow the set_got to be deleted, though we're
5660 too late to do anything about the ebx save in the prologue. */
5661 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5664 /* Prevent function calls from be scheduled before the call to mcount.
5665 In the pic_reg_used case, make sure that the got load isn't deleted. */
5666 if (current_function_profile)
5667 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5670 /* Emit code to restore saved registers using MOV insns. First register
5671 is restored from POINTER + OFFSET. */
5673 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5674 int maybe_eh_return)
5677 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5679 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5680 if (ix86_save_reg (regno, maybe_eh_return))
5682 /* Ensure that adjust_address won't be forced to produce pointer
5683 out of range allowed by x86-64 instruction set. */
5684 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5688 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5689 emit_move_insn (r11, GEN_INT (offset));
5690 emit_insn (gen_adddi3 (r11, r11, pointer));
5691 base_address = gen_rtx_MEM (Pmode, r11);
5694 emit_move_insn (gen_rtx_REG (Pmode, regno),
5695 adjust_address (base_address, Pmode, offset));
5696 offset += UNITS_PER_WORD;
5700 /* Restore function stack, frame, and registers. */
5703 ix86_expand_epilogue (int style)
5706 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5707 struct ix86_frame frame;
5708 HOST_WIDE_INT offset;
5710 ix86_compute_frame_layout (&frame);
5712 /* Calculate start of saved registers relative to ebp. Special care
5713 must be taken for the normal return case of a function using
5714 eh_return: the eax and edx registers are marked as saved, but not
5715 restored along this path. */
5716 offset = frame.nregs;
5717 if (current_function_calls_eh_return && style != 2)
5719 offset *= -UNITS_PER_WORD;
5721 /* If we're only restoring one register and sp is not valid then
5722 using a move instruction to restore the register since it's
5723 less work than reloading sp and popping the register.
5725 The default code result in stack adjustment using add/lea instruction,
5726 while this code results in LEAVE instruction (or discrete equivalent),
5727 so it is profitable in some other cases as well. Especially when there
5728 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5729 and there is exactly one register to pop. This heuristic may need some
5730 tuning in future. */
5731 if ((!sp_valid && frame.nregs <= 1)
5732 || (TARGET_EPILOGUE_USING_MOVE
5733 && cfun->machine->use_fast_prologue_epilogue
5734 && (frame.nregs > 1 || frame.to_allocate))
5735 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5736 || (frame_pointer_needed && TARGET_USE_LEAVE
5737 && cfun->machine->use_fast_prologue_epilogue
5738 && frame.nregs == 1)
5739 || current_function_calls_eh_return)
5741 /* Restore registers. We can use ebp or esp to address the memory
5742 locations. If both are available, default to ebp, since offsets
5743 are known to be small. Only exception is esp pointing directly to the
5744 end of block of saved registers, where we may simplify addressing
5747 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5748 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5749 frame.to_allocate, style == 2);
5751 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5752 offset, style == 2);
5754 /* eh_return epilogues need %ecx added to the stack pointer. */
5757 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5759 if (frame_pointer_needed)
5761 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5762 tmp = plus_constant (tmp, UNITS_PER_WORD);
5763 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5765 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5766 emit_move_insn (hard_frame_pointer_rtx, tmp);
5768 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5773 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5774 tmp = plus_constant (tmp, (frame.to_allocate
5775 + frame.nregs * UNITS_PER_WORD));
5776 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5779 else if (!frame_pointer_needed)
5780 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5781 GEN_INT (frame.to_allocate
5782 + frame.nregs * UNITS_PER_WORD),
5784 /* If not an i386, mov & pop is faster than "leave". */
5785 else if (TARGET_USE_LEAVE || optimize_size
5786 || !cfun->machine->use_fast_prologue_epilogue)
5787 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5790 pro_epilogue_adjust_stack (stack_pointer_rtx,
5791 hard_frame_pointer_rtx,
5794 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5796 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5801 /* First step is to deallocate the stack frame so that we can
5802 pop the registers. */
5805 gcc_assert (frame_pointer_needed);
5806 pro_epilogue_adjust_stack (stack_pointer_rtx,
5807 hard_frame_pointer_rtx,
5808 GEN_INT (offset), style);
5810 else if (frame.to_allocate)
5811 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5812 GEN_INT (frame.to_allocate), style);
5814 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5815 if (ix86_save_reg (regno, false))
5818 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5820 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5822 if (frame_pointer_needed)
5824 /* Leave results in shorter dependency chains on CPUs that are
5825 able to grok it fast. */
5826 if (TARGET_USE_LEAVE)
5827 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5828 else if (TARGET_64BIT)
5829 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5831 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5835 if (cfun->machine->force_align_arg_pointer)
5837 emit_insn (gen_addsi3 (stack_pointer_rtx,
5838 cfun->machine->force_align_arg_pointer,
5842 /* Sibcall epilogues don't want a return instruction. */
5846 if (current_function_pops_args && current_function_args_size)
5848 rtx popc = GEN_INT (current_function_pops_args);
5850 /* i386 can only pop 64K bytes. If asked to pop more, pop
5851 return address, do explicit add, and jump indirectly to the
5854 if (current_function_pops_args >= 65536)
5856 rtx ecx = gen_rtx_REG (SImode, 2);
5858 /* There is no "pascal" calling convention in 64bit ABI. */
5859 gcc_assert (!TARGET_64BIT);
5861 emit_insn (gen_popsi1 (ecx));
5862 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5863 emit_jump_insn (gen_return_indirect_internal (ecx));
5866 emit_jump_insn (gen_return_pop_internal (popc));
5869 emit_jump_insn (gen_return_internal ());
5872 /* Reset from the function's potential modifications. */
5875 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5876 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5878 if (pic_offset_table_rtx)
5879 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5881 /* Mach-O doesn't support labels at the end of objects, so if
5882 it looks like we might want one, insert a NOP. */
5884 rtx insn = get_last_insn ();
5887 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5888 insn = PREV_INSN (insn);
5892 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5893 fputs ("\tnop\n", file);
5899 /* Extract the parts of an RTL expression that is a valid memory address
5900 for an instruction. Return 0 if the structure of the address is
5901 grossly off. Return -1 if the address contains ASHIFT, so it is not
5902 strictly valid, but still used for computing length of lea instruction. */
5905 ix86_decompose_address (rtx addr, struct ix86_address *out)
5907 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5908 rtx base_reg, index_reg;
5909 HOST_WIDE_INT scale = 1;
5910 rtx scale_rtx = NULL_RTX;
5912 enum ix86_address_seg seg = SEG_DEFAULT;
5914 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5916 else if (GET_CODE (addr) == PLUS)
5926 addends[n++] = XEXP (op, 1);
5929 while (GET_CODE (op) == PLUS);
5934 for (i = n; i >= 0; --i)
5937 switch (GET_CODE (op))
5942 index = XEXP (op, 0);
5943 scale_rtx = XEXP (op, 1);
5947 if (XINT (op, 1) == UNSPEC_TP
5948 && TARGET_TLS_DIRECT_SEG_REFS
5949 && seg == SEG_DEFAULT)
5950 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5979 else if (GET_CODE (addr) == MULT)
5981 index = XEXP (addr, 0); /* index*scale */
5982 scale_rtx = XEXP (addr, 1);
5984 else if (GET_CODE (addr) == ASHIFT)
5988 /* We're called for lea too, which implements ashift on occasion. */
5989 index = XEXP (addr, 0);
5990 tmp = XEXP (addr, 1);
5991 if (GET_CODE (tmp) != CONST_INT)
5993 scale = INTVAL (tmp);
5994 if ((unsigned HOST_WIDE_INT) scale > 3)
6000 disp = addr; /* displacement */
6002 /* Extract the integral value of scale. */
6005 if (GET_CODE (scale_rtx) != CONST_INT)
6007 scale = INTVAL (scale_rtx);
6010 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6011 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6013 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6014 if (base_reg && index_reg && scale == 1
6015 && (index_reg == arg_pointer_rtx
6016 || index_reg == frame_pointer_rtx
6017 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6020 tmp = base, base = index, index = tmp;
6021 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6024 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6025 if ((base_reg == hard_frame_pointer_rtx
6026 || base_reg == frame_pointer_rtx
6027 || base_reg == arg_pointer_rtx) && !disp)
6030 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6031 Avoid this by transforming to [%esi+0]. */
6032 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6033 && base_reg && !index_reg && !disp
6035 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6038 /* Special case: encode reg+reg instead of reg*2. */
6039 if (!base && index && scale && scale == 2)
6040 base = index, base_reg = index_reg, scale = 1;
6042 /* Special case: scaling cannot be encoded without base or displacement. */
6043 if (!base && !disp && index && scale != 1)
6055 /* Return cost of the memory address x.
6056 For i386, it is better to use a complex address than let gcc copy
6057 the address into a reg and make a new pseudo. But not if the address
6058 requires to two regs - that would mean more pseudos with longer
6061 ix86_address_cost (rtx x)
6063 struct ix86_address parts;
6065 int ok = ix86_decompose_address (x, &parts);
6069 if (parts.base && GET_CODE (parts.base) == SUBREG)
6070 parts.base = SUBREG_REG (parts.base);
6071 if (parts.index && GET_CODE (parts.index) == SUBREG)
6072 parts.index = SUBREG_REG (parts.index);
6074 /* More complex memory references are better. */
6075 if (parts.disp && parts.disp != const0_rtx)
6077 if (parts.seg != SEG_DEFAULT)
6080 /* Attempt to minimize number of registers in the address. */
6082 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6084 && (!REG_P (parts.index)
6085 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6089 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6091 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6092 && parts.base != parts.index)
6095 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6096 since it's predecode logic can't detect the length of instructions
6097 and it degenerates to vector decoded. Increase cost of such
6098 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6099 to split such addresses or even refuse such addresses at all.
6101 Following addressing modes are affected:
6106 The first and last case may be avoidable by explicitly coding the zero in
6107 memory address, but I don't have AMD-K6 machine handy to check this
6111 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6112 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6113 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6119 /* If X is a machine specific address (i.e. a symbol or label being
6120 referenced as a displacement from the GOT implemented using an
6121 UNSPEC), then return the base term. Otherwise return X. */
6124 ix86_find_base_term (rtx x)
6130 if (GET_CODE (x) != CONST)
6133 if (GET_CODE (term) == PLUS
6134 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6135 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6136 term = XEXP (term, 0);
6137 if (GET_CODE (term) != UNSPEC
6138 || XINT (term, 1) != UNSPEC_GOTPCREL)
6141 term = XVECEXP (term, 0, 0);
6143 if (GET_CODE (term) != SYMBOL_REF
6144 && GET_CODE (term) != LABEL_REF)
6150 term = ix86_delegitimize_address (x);
6152 if (GET_CODE (term) != SYMBOL_REF
6153 && GET_CODE (term) != LABEL_REF)
6159 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6160 this is used for to form addresses to local data when -fPIC is in
6164 darwin_local_data_pic (rtx disp)
6166 if (GET_CODE (disp) == MINUS)
6168 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6169 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6170 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6172 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6173 if (! strcmp (sym_name, "<pic base>"))
6181 /* Determine if a given RTX is a valid constant. We already know this
6182 satisfies CONSTANT_P. */
6185 legitimate_constant_p (rtx x)
6187 switch (GET_CODE (x))
6192 if (GET_CODE (x) == PLUS)
6194 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6199 if (TARGET_MACHO && darwin_local_data_pic (x))
6202 /* Only some unspecs are valid as "constants". */
6203 if (GET_CODE (x) == UNSPEC)
6204 switch (XINT (x, 1))
6207 return TARGET_64BIT;
6210 x = XVECEXP (x, 0, 0);
6211 return (GET_CODE (x) == SYMBOL_REF
6212 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6214 x = XVECEXP (x, 0, 0);
6215 return (GET_CODE (x) == SYMBOL_REF
6216 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6221 /* We must have drilled down to a symbol. */
6222 if (GET_CODE (x) == LABEL_REF)
6224 if (GET_CODE (x) != SYMBOL_REF)
6229 /* TLS symbols are never valid. */
6230 if (SYMBOL_REF_TLS_MODEL (x))
6235 if (GET_MODE (x) == TImode
6236 && x != CONST0_RTX (TImode)
6242 if (x == CONST0_RTX (GET_MODE (x)))
6250 /* Otherwise we handle everything else in the move patterns. */
6254 /* Determine if it's legal to put X into the constant pool. This
6255 is not possible for the address of thread-local symbols, which
6256 is checked above. */
6259 ix86_cannot_force_const_mem (rtx x)
6261 /* We can always put integral constants and vectors in memory. */
6262 switch (GET_CODE (x))
6272 return !legitimate_constant_p (x);
6275 /* Determine if a given RTX is a valid constant address. */
6278 constant_address_p (rtx x)
6280 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6283 /* Nonzero if the constant value X is a legitimate general operand
6284 when generating PIC code. It is given that flag_pic is on and
6285 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6288 legitimate_pic_operand_p (rtx x)
6292 switch (GET_CODE (x))
6295 inner = XEXP (x, 0);
6296 if (GET_CODE (inner) == PLUS
6297 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6298 inner = XEXP (inner, 0);
6300 /* Only some unspecs are valid as "constants". */
6301 if (GET_CODE (inner) == UNSPEC)
6302 switch (XINT (inner, 1))
6305 return TARGET_64BIT;
6307 x = XVECEXP (inner, 0, 0);
6308 return (GET_CODE (x) == SYMBOL_REF
6309 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6317 return legitimate_pic_address_disp_p (x);
6324 /* Determine if a given CONST RTX is a valid memory displacement
6328 legitimate_pic_address_disp_p (rtx disp)
6332 /* In 64bit mode we can allow direct addresses of symbols and labels
6333 when they are not dynamic symbols. */
6336 rtx op0 = disp, op1;
6338 switch (GET_CODE (disp))
6344 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6346 op0 = XEXP (XEXP (disp, 0), 0);
6347 op1 = XEXP (XEXP (disp, 0), 1);
6348 if (GET_CODE (op1) != CONST_INT
6349 || INTVAL (op1) >= 16*1024*1024
6350 || INTVAL (op1) < -16*1024*1024)
6352 if (GET_CODE (op0) == LABEL_REF)
6354 if (GET_CODE (op0) != SYMBOL_REF)
6359 /* TLS references should always be enclosed in UNSPEC. */
6360 if (SYMBOL_REF_TLS_MODEL (op0))
6362 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6370 if (GET_CODE (disp) != CONST)
6372 disp = XEXP (disp, 0);
6376 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6377 of GOT tables. We should not need these anyway. */
6378 if (GET_CODE (disp) != UNSPEC
6379 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6380 && XINT (disp, 1) != UNSPEC_GOTOFF))
6383 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6384 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6390 if (GET_CODE (disp) == PLUS)
6392 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6394 disp = XEXP (disp, 0);
6398 if (TARGET_MACHO && darwin_local_data_pic (disp))
6401 if (GET_CODE (disp) != UNSPEC)
6404 switch (XINT (disp, 1))
6409 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6411 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6412 While ABI specify also 32bit relocation but we don't produce it in
6413 small PIC model at all. */
6414 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6415 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6417 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6419 case UNSPEC_GOTTPOFF:
6420 case UNSPEC_GOTNTPOFF:
6421 case UNSPEC_INDNTPOFF:
6424 disp = XVECEXP (disp, 0, 0);
6425 return (GET_CODE (disp) == SYMBOL_REF
6426 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6428 disp = XVECEXP (disp, 0, 0);
6429 return (GET_CODE (disp) == SYMBOL_REF
6430 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6432 disp = XVECEXP (disp, 0, 0);
6433 return (GET_CODE (disp) == SYMBOL_REF
6434 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6440 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6441 memory address for an instruction. The MODE argument is the machine mode
6442 for the MEM expression that wants to use this address.
6444 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6445 convert common non-canonical forms to canonical form so that they will
6449 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6451 struct ix86_address parts;
6452 rtx base, index, disp;
6453 HOST_WIDE_INT scale;
6454 const char *reason = NULL;
6455 rtx reason_rtx = NULL_RTX;
6457 if (TARGET_DEBUG_ADDR)
6460 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6461 GET_MODE_NAME (mode), strict);
6465 if (ix86_decompose_address (addr, &parts) <= 0)
6467 reason = "decomposition failed";
6472 index = parts.index;
6474 scale = parts.scale;
6476 /* Validate base register.
6478 Don't allow SUBREG's that span more than a word here. It can lead to spill
6479 failures when the base is one word out of a two word structure, which is
6480 represented internally as a DImode int. */
6489 else if (GET_CODE (base) == SUBREG
6490 && REG_P (SUBREG_REG (base))
6491 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6493 reg = SUBREG_REG (base);
6496 reason = "base is not a register";
6500 if (GET_MODE (base) != Pmode)
6502 reason = "base is not in Pmode";
6506 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6507 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6509 reason = "base is not valid";
6514 /* Validate index register.
6516 Don't allow SUBREG's that span more than a word here -- same as above. */
6525 else if (GET_CODE (index) == SUBREG
6526 && REG_P (SUBREG_REG (index))
6527 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6529 reg = SUBREG_REG (index);
6532 reason = "index is not a register";
6536 if (GET_MODE (index) != Pmode)
6538 reason = "index is not in Pmode";
6542 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6543 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6545 reason = "index is not valid";
6550 /* Validate scale factor. */
6553 reason_rtx = GEN_INT (scale);
6556 reason = "scale without index";
6560 if (scale != 2 && scale != 4 && scale != 8)
6562 reason = "scale is not a valid multiplier";
6567 /* Validate displacement. */
6572 if (GET_CODE (disp) == CONST
6573 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6574 switch (XINT (XEXP (disp, 0), 1))
6576 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6577 used. While ABI specify also 32bit relocations, we don't produce
6578 them at all and use IP relative instead. */
6581 gcc_assert (flag_pic);
6583 goto is_legitimate_pic;
6584 reason = "64bit address unspec";
6587 case UNSPEC_GOTPCREL:
6588 gcc_assert (flag_pic);
6589 goto is_legitimate_pic;
6591 case UNSPEC_GOTTPOFF:
6592 case UNSPEC_GOTNTPOFF:
6593 case UNSPEC_INDNTPOFF:
6599 reason = "invalid address unspec";
6603 else if (SYMBOLIC_CONST (disp)
6607 && MACHOPIC_INDIRECT
6608 && !machopic_operand_p (disp)
6614 if (TARGET_64BIT && (index || base))
6616 /* foo@dtpoff(%rX) is ok. */
6617 if (GET_CODE (disp) != CONST
6618 || GET_CODE (XEXP (disp, 0)) != PLUS
6619 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6620 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6621 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6622 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6624 reason = "non-constant pic memory reference";
6628 else if (! legitimate_pic_address_disp_p (disp))
6630 reason = "displacement is an invalid pic construct";
6634 /* This code used to verify that a symbolic pic displacement
6635 includes the pic_offset_table_rtx register.
6637 While this is good idea, unfortunately these constructs may
6638 be created by "adds using lea" optimization for incorrect
6647 This code is nonsensical, but results in addressing
6648 GOT table with pic_offset_table_rtx base. We can't
6649 just refuse it easily, since it gets matched by
6650 "addsi3" pattern, that later gets split to lea in the
6651 case output register differs from input. While this
6652 can be handled by separate addsi pattern for this case
6653 that never results in lea, this seems to be easier and
6654 correct fix for crash to disable this test. */
6656 else if (GET_CODE (disp) != LABEL_REF
6657 && GET_CODE (disp) != CONST_INT
6658 && (GET_CODE (disp) != CONST
6659 || !legitimate_constant_p (disp))
6660 && (GET_CODE (disp) != SYMBOL_REF
6661 || !legitimate_constant_p (disp)))
6663 reason = "displacement is not constant";
6666 else if (TARGET_64BIT
6667 && !x86_64_immediate_operand (disp, VOIDmode))
6669 reason = "displacement is out of range";
6674 /* Everything looks valid. */
6675 if (TARGET_DEBUG_ADDR)
6676 fprintf (stderr, "Success.\n");
6680 if (TARGET_DEBUG_ADDR)
6682 fprintf (stderr, "Error: %s\n", reason);
6683 debug_rtx (reason_rtx);
6688 /* Return a unique alias set for the GOT. */
6690 static HOST_WIDE_INT
6691 ix86_GOT_alias_set (void)
6693 static HOST_WIDE_INT set = -1;
6695 set = new_alias_set ();
6699 /* Return a legitimate reference for ORIG (an address) using the
6700 register REG. If REG is 0, a new pseudo is generated.
6702 There are two types of references that must be handled:
6704 1. Global data references must load the address from the GOT, via
6705 the PIC reg. An insn is emitted to do this load, and the reg is
6708 2. Static data references, constant pool addresses, and code labels
6709 compute the address as an offset from the GOT, whose base is in
6710 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6711 differentiate them from global data objects. The returned
6712 address is the PIC reg + an unspec constant.
6714 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6715 reg also appears in the address. */
6718 legitimize_pic_address (rtx orig, rtx reg)
6725 if (TARGET_MACHO && !TARGET_64BIT)
6728 reg = gen_reg_rtx (Pmode);
6729 /* Use the generic Mach-O PIC machinery. */
6730 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6734 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6736 else if (TARGET_64BIT
6737 && ix86_cmodel != CM_SMALL_PIC
6738 && local_symbolic_operand (addr, Pmode))
6741 /* This symbol may be referenced via a displacement from the PIC
6742 base address (@GOTOFF). */
6744 if (reload_in_progress)
6745 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6746 if (GET_CODE (addr) == CONST)
6747 addr = XEXP (addr, 0);
6748 if (GET_CODE (addr) == PLUS)
6750 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6751 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6754 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6755 new = gen_rtx_CONST (Pmode, new);
6757 tmpreg = gen_reg_rtx (Pmode);
6760 emit_move_insn (tmpreg, new);
6764 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6765 tmpreg, 1, OPTAB_DIRECT);
6768 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6770 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6772 /* This symbol may be referenced via a displacement from the PIC
6773 base address (@GOTOFF). */
6775 if (reload_in_progress)
6776 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6777 if (GET_CODE (addr) == CONST)
6778 addr = XEXP (addr, 0);
6779 if (GET_CODE (addr) == PLUS)
6781 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6782 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6785 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6786 new = gen_rtx_CONST (Pmode, new);
6787 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6791 emit_move_insn (reg, new);
6795 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6799 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6800 new = gen_rtx_CONST (Pmode, new);
6801 new = gen_const_mem (Pmode, new);
6802 set_mem_alias_set (new, ix86_GOT_alias_set ());
6805 reg = gen_reg_rtx (Pmode);
6806 /* Use directly gen_movsi, otherwise the address is loaded
6807 into register for CSE. We don't want to CSE this addresses,
6808 instead we CSE addresses from the GOT table, so skip this. */
6809 emit_insn (gen_movsi (reg, new));
6814 /* This symbol must be referenced via a load from the
6815 Global Offset Table (@GOT). */
6817 if (reload_in_progress)
6818 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6819 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6820 new = gen_rtx_CONST (Pmode, new);
6821 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6822 new = gen_const_mem (Pmode, new);
6823 set_mem_alias_set (new, ix86_GOT_alias_set ());
6826 reg = gen_reg_rtx (Pmode);
6827 emit_move_insn (reg, new);
6833 if (GET_CODE (addr) == CONST_INT
6834 && !x86_64_immediate_operand (addr, VOIDmode))
6838 emit_move_insn (reg, addr);
6842 new = force_reg (Pmode, addr);
6844 else if (GET_CODE (addr) == CONST)
6846 addr = XEXP (addr, 0);
6848 /* We must match stuff we generate before. Assume the only
6849 unspecs that can get here are ours. Not that we could do
6850 anything with them anyway.... */
6851 if (GET_CODE (addr) == UNSPEC
6852 || (GET_CODE (addr) == PLUS
6853 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6855 gcc_assert (GET_CODE (addr) == PLUS);
6857 if (GET_CODE (addr) == PLUS)
6859 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6861 /* Check first to see if this is a constant offset from a @GOTOFF
6862 symbol reference. */
6863 if (local_symbolic_operand (op0, Pmode)
6864 && GET_CODE (op1) == CONST_INT)
6868 if (reload_in_progress)
6869 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6870 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6872 new = gen_rtx_PLUS (Pmode, new, op1);
6873 new = gen_rtx_CONST (Pmode, new);
6874 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6878 emit_move_insn (reg, new);
6884 if (INTVAL (op1) < -16*1024*1024
6885 || INTVAL (op1) >= 16*1024*1024)
6887 if (!x86_64_immediate_operand (op1, Pmode))
6888 op1 = force_reg (Pmode, op1);
6889 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6895 base = legitimize_pic_address (XEXP (addr, 0), reg);
6896 new = legitimize_pic_address (XEXP (addr, 1),
6897 base == reg ? NULL_RTX : reg);
6899 if (GET_CODE (new) == CONST_INT)
6900 new = plus_constant (base, INTVAL (new));
6903 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6905 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6906 new = XEXP (new, 1);
6908 new = gen_rtx_PLUS (Pmode, base, new);
6916 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6919 get_thread_pointer (int to_reg)
6923 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6927 reg = gen_reg_rtx (Pmode);
6928 insn = gen_rtx_SET (VOIDmode, reg, tp);
6929 insn = emit_insn (insn);
6934 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6935 false if we expect this to be used for a memory address and true if
6936 we expect to load the address into a register. */
6939 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6941 rtx dest, base, off, pic, tp;
6946 case TLS_MODEL_GLOBAL_DYNAMIC:
6947 dest = gen_reg_rtx (Pmode);
6948 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6950 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6952 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6955 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6956 insns = get_insns ();
6959 emit_libcall_block (insns, dest, rax, x);
6961 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6962 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6964 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6966 if (TARGET_GNU2_TLS)
6968 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6970 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6974 case TLS_MODEL_LOCAL_DYNAMIC:
6975 base = gen_reg_rtx (Pmode);
6976 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6978 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6980 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6983 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6984 insns = get_insns ();
6987 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6988 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6989 emit_libcall_block (insns, base, rax, note);
6991 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6992 emit_insn (gen_tls_local_dynamic_base_64 (base));
6994 emit_insn (gen_tls_local_dynamic_base_32 (base));
6996 if (TARGET_GNU2_TLS)
6998 rtx x = ix86_tls_module_base ();
7000 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7001 gen_rtx_MINUS (Pmode, x, tp));
7004 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7005 off = gen_rtx_CONST (Pmode, off);
7007 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7009 if (TARGET_GNU2_TLS)
7011 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7013 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7018 case TLS_MODEL_INITIAL_EXEC:
7022 type = UNSPEC_GOTNTPOFF;
7026 if (reload_in_progress)
7027 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7028 pic = pic_offset_table_rtx;
7029 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7031 else if (!TARGET_ANY_GNU_TLS)
7033 pic = gen_reg_rtx (Pmode);
7034 emit_insn (gen_set_got (pic));
7035 type = UNSPEC_GOTTPOFF;
7040 type = UNSPEC_INDNTPOFF;
7043 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7044 off = gen_rtx_CONST (Pmode, off);
7046 off = gen_rtx_PLUS (Pmode, pic, off);
7047 off = gen_const_mem (Pmode, off);
7048 set_mem_alias_set (off, ix86_GOT_alias_set ());
7050 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7052 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7053 off = force_reg (Pmode, off);
7054 return gen_rtx_PLUS (Pmode, base, off);
7058 base = get_thread_pointer (true);
7059 dest = gen_reg_rtx (Pmode);
7060 emit_insn (gen_subsi3 (dest, base, off));
7064 case TLS_MODEL_LOCAL_EXEC:
7065 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7066 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7067 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7068 off = gen_rtx_CONST (Pmode, off);
7070 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7072 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7073 return gen_rtx_PLUS (Pmode, base, off);
7077 base = get_thread_pointer (true);
7078 dest = gen_reg_rtx (Pmode);
7079 emit_insn (gen_subsi3 (dest, base, off));
7090 /* Try machine-dependent ways of modifying an illegitimate address
7091 to be legitimate. If we find one, return the new, valid address.
7092 This macro is used in only one place: `memory_address' in explow.c.
7094 OLDX is the address as it was before break_out_memory_refs was called.
7095 In some cases it is useful to look at this to decide what needs to be done.
7097 MODE and WIN are passed so that this macro can use
7098 GO_IF_LEGITIMATE_ADDRESS.
7100 It is always safe for this macro to do nothing. It exists to recognize
7101 opportunities to optimize the output.
7103 For the 80386, we handle X+REG by loading X into a register R and
7104 using R+REG. R will go in a general reg and indexing will be used.
7105 However, if REG is a broken-out memory address or multiplication,
7106 nothing needs to be done because REG can certainly go in a general reg.
7108 When -fpic is used, special handling is needed for symbolic references.
7109 See comments by legitimize_pic_address in i386.c for details. */
7112 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7117 if (TARGET_DEBUG_ADDR)
7119 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7120 GET_MODE_NAME (mode));
7124 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7126 return legitimize_tls_address (x, log, false);
7127 if (GET_CODE (x) == CONST
7128 && GET_CODE (XEXP (x, 0)) == PLUS
7129 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7130 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7132 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7133 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7136 if (flag_pic && SYMBOLIC_CONST (x))
7137 return legitimize_pic_address (x, 0);
7139 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7140 if (GET_CODE (x) == ASHIFT
7141 && GET_CODE (XEXP (x, 1)) == CONST_INT
7142 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7145 log = INTVAL (XEXP (x, 1));
7146 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7147 GEN_INT (1 << log));
7150 if (GET_CODE (x) == PLUS)
7152 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7154 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7155 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7156 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7159 log = INTVAL (XEXP (XEXP (x, 0), 1));
7160 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7161 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7162 GEN_INT (1 << log));
7165 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7166 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7167 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7170 log = INTVAL (XEXP (XEXP (x, 1), 1));
7171 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7172 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7173 GEN_INT (1 << log));
7176 /* Put multiply first if it isn't already. */
7177 if (GET_CODE (XEXP (x, 1)) == MULT)
7179 rtx tmp = XEXP (x, 0);
7180 XEXP (x, 0) = XEXP (x, 1);
7185 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7186 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7187 created by virtual register instantiation, register elimination, and
7188 similar optimizations. */
7189 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7192 x = gen_rtx_PLUS (Pmode,
7193 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7194 XEXP (XEXP (x, 1), 0)),
7195 XEXP (XEXP (x, 1), 1));
7199 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7200 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7201 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7202 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7203 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7204 && CONSTANT_P (XEXP (x, 1)))
7207 rtx other = NULL_RTX;
7209 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7211 constant = XEXP (x, 1);
7212 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7214 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7216 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7217 other = XEXP (x, 1);
7225 x = gen_rtx_PLUS (Pmode,
7226 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7227 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7228 plus_constant (other, INTVAL (constant)));
7232 if (changed && legitimate_address_p (mode, x, FALSE))
7235 if (GET_CODE (XEXP (x, 0)) == MULT)
7238 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7241 if (GET_CODE (XEXP (x, 1)) == MULT)
7244 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7248 && GET_CODE (XEXP (x, 1)) == REG
7249 && GET_CODE (XEXP (x, 0)) == REG)
7252 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7255 x = legitimize_pic_address (x, 0);
7258 if (changed && legitimate_address_p (mode, x, FALSE))
7261 if (GET_CODE (XEXP (x, 0)) == REG)
7263 rtx temp = gen_reg_rtx (Pmode);
7264 rtx val = force_operand (XEXP (x, 1), temp);
7266 emit_move_insn (temp, val);
7272 else if (GET_CODE (XEXP (x, 1)) == REG)
7274 rtx temp = gen_reg_rtx (Pmode);
7275 rtx val = force_operand (XEXP (x, 0), temp);
7277 emit_move_insn (temp, val);
7287 /* Print an integer constant expression in assembler syntax. Addition
7288 and subtraction are the only arithmetic that may appear in these
7289 expressions. FILE is the stdio stream to write to, X is the rtx, and
7290 CODE is the operand print code from the output string. */
7293 output_pic_addr_const (FILE *file, rtx x, int code)
7297 switch (GET_CODE (x))
7300 gcc_assert (flag_pic);
7305 output_addr_const (file, x);
7306 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7307 fputs ("@PLT", file);
7314 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7315 assemble_name (asm_out_file, buf);
7319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7323 /* This used to output parentheses around the expression,
7324 but that does not work on the 386 (either ATT or BSD assembler). */
7325 output_pic_addr_const (file, XEXP (x, 0), code);
7329 if (GET_MODE (x) == VOIDmode)
7331 /* We can use %d if the number is <32 bits and positive. */
7332 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7333 fprintf (file, "0x%lx%08lx",
7334 (unsigned long) CONST_DOUBLE_HIGH (x),
7335 (unsigned long) CONST_DOUBLE_LOW (x));
7337 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7340 /* We can't handle floating point constants;
7341 PRINT_OPERAND must handle them. */
7342 output_operand_lossage ("floating constant misused");
7346 /* Some assemblers need integer constants to appear first. */
7347 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7349 output_pic_addr_const (file, XEXP (x, 0), code);
7351 output_pic_addr_const (file, XEXP (x, 1), code);
7355 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7356 output_pic_addr_const (file, XEXP (x, 1), code);
7358 output_pic_addr_const (file, XEXP (x, 0), code);
7364 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7365 output_pic_addr_const (file, XEXP (x, 0), code);
7367 output_pic_addr_const (file, XEXP (x, 1), code);
7369 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7373 gcc_assert (XVECLEN (x, 0) == 1);
7374 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7375 switch (XINT (x, 1))
7378 fputs ("@GOT", file);
7381 fputs ("@GOTOFF", file);
7383 case UNSPEC_GOTPCREL:
7384 fputs ("@GOTPCREL(%rip)", file);
7386 case UNSPEC_GOTTPOFF:
7387 /* FIXME: This might be @TPOFF in Sun ld too. */
7388 fputs ("@GOTTPOFF", file);
7391 fputs ("@TPOFF", file);
7395 fputs ("@TPOFF", file);
7397 fputs ("@NTPOFF", file);
7400 fputs ("@DTPOFF", file);
7402 case UNSPEC_GOTNTPOFF:
7404 fputs ("@GOTTPOFF(%rip)", file);
7406 fputs ("@GOTNTPOFF", file);
7408 case UNSPEC_INDNTPOFF:
7409 fputs ("@INDNTPOFF", file);
7412 output_operand_lossage ("invalid UNSPEC as operand");
7418 output_operand_lossage ("invalid expression as operand");
7422 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7423 We need to emit DTP-relative relocations. */
7426 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7428 fputs (ASM_LONG, file);
7429 output_addr_const (file, x);
7430 fputs ("@DTPOFF", file);
7436 fputs (", 0", file);
7443 /* In the name of slightly smaller debug output, and to cater to
7444 general assembler lossage, recognize PIC+GOTOFF and turn it back
7445 into a direct symbol reference.
7447 On Darwin, this is necessary to avoid a crash, because Darwin
7448 has a different PIC label for each routine but the DWARF debugging
7449 information is not associated with any particular routine, so it's
7450 necessary to remove references to the PIC label from RTL stored by
7451 the DWARF output code. */
7454 ix86_delegitimize_address (rtx orig_x)
7457 /* reg_addend is NULL or a multiple of some register. */
7458 rtx reg_addend = NULL_RTX;
7459 /* const_addend is NULL or a const_int. */
7460 rtx const_addend = NULL_RTX;
7461 /* This is the result, or NULL. */
7462 rtx result = NULL_RTX;
7464 if (GET_CODE (x) == MEM)
7469 if (GET_CODE (x) != CONST
7470 || GET_CODE (XEXP (x, 0)) != UNSPEC
7471 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7472 || GET_CODE (orig_x) != MEM)
7474 return XVECEXP (XEXP (x, 0), 0, 0);
7477 if (GET_CODE (x) != PLUS
7478 || GET_CODE (XEXP (x, 1)) != CONST)
7481 if (GET_CODE (XEXP (x, 0)) == REG
7482 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7483 /* %ebx + GOT/GOTOFF */
7485 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7487 /* %ebx + %reg * scale + GOT/GOTOFF */
7488 reg_addend = XEXP (x, 0);
7489 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7490 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7491 reg_addend = XEXP (reg_addend, 1);
7492 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7493 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7494 reg_addend = XEXP (reg_addend, 0);
7497 if (GET_CODE (reg_addend) != REG
7498 && GET_CODE (reg_addend) != MULT
7499 && GET_CODE (reg_addend) != ASHIFT)
7505 x = XEXP (XEXP (x, 1), 0);
7506 if (GET_CODE (x) == PLUS
7507 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7509 const_addend = XEXP (x, 1);
7513 if (GET_CODE (x) == UNSPEC
7514 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7515 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7516 result = XVECEXP (x, 0, 0);
7518 if (TARGET_MACHO && darwin_local_data_pic (x)
7519 && GET_CODE (orig_x) != MEM)
7520 result = XEXP (x, 0);
7526 result = gen_rtx_PLUS (Pmode, result, const_addend);
7528 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7533 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7538 if (mode == CCFPmode || mode == CCFPUmode)
7540 enum rtx_code second_code, bypass_code;
7541 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7542 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7543 code = ix86_fp_compare_code_to_integer (code);
7547 code = reverse_condition (code);
7558 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7562 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7563 Those same assemblers have the same but opposite lossage on cmov. */
7564 gcc_assert (mode == CCmode);
7565 suffix = fp ? "nbe" : "a";
7585 gcc_assert (mode == CCmode);
7607 gcc_assert (mode == CCmode);
7608 suffix = fp ? "nb" : "ae";
7611 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7615 gcc_assert (mode == CCmode);
7619 suffix = fp ? "u" : "p";
7622 suffix = fp ? "nu" : "np";
7627 fputs (suffix, file);
7630 /* Print the name of register X to FILE based on its machine mode and number.
7631 If CODE is 'w', pretend the mode is HImode.
7632 If CODE is 'b', pretend the mode is QImode.
7633 If CODE is 'k', pretend the mode is SImode.
7634 If CODE is 'q', pretend the mode is DImode.
7635 If CODE is 'h', pretend the reg is the 'high' byte register.
7636 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7639 print_reg (rtx x, int code, FILE *file)
7641 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7642 && REGNO (x) != FRAME_POINTER_REGNUM
7643 && REGNO (x) != FLAGS_REG
7644 && REGNO (x) != FPSR_REG
7645 && REGNO (x) != FPCR_REG);
7647 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7650 if (code == 'w' || MMX_REG_P (x))
7652 else if (code == 'b')
7654 else if (code == 'k')
7656 else if (code == 'q')
7658 else if (code == 'y')
7660 else if (code == 'h')
7663 code = GET_MODE_SIZE (GET_MODE (x));
7665 /* Irritatingly, AMD extended registers use different naming convention
7666 from the normal registers. */
7667 if (REX_INT_REG_P (x))
7669 gcc_assert (TARGET_64BIT);
7673 error ("extended registers have no high halves");
7676 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7679 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7682 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7685 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7688 error ("unsupported operand size for extended register");
7696 if (STACK_TOP_P (x))
7698 fputs ("st(0)", file);
7705 if (! ANY_FP_REG_P (x))
7706 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7711 fputs (hi_reg_name[REGNO (x)], file);
7714 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7716 fputs (qi_reg_name[REGNO (x)], file);
7719 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7721 fputs (qi_high_reg_name[REGNO (x)], file);
7728 /* Locate some local-dynamic symbol still in use by this function
7729 so that we can print its name in some tls_local_dynamic_base
7733 get_some_local_dynamic_name (void)
7737 if (cfun->machine->some_ld_name)
7738 return cfun->machine->some_ld_name;
7740 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7742 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7743 return cfun->machine->some_ld_name;
7749 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7753 if (GET_CODE (x) == SYMBOL_REF
7754 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7756 cfun->machine->some_ld_name = XSTR (x, 0);
7764 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7765 C -- print opcode suffix for set/cmov insn.
7766 c -- like C, but print reversed condition
7767 F,f -- likewise, but for floating-point.
7768 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7770 R -- print the prefix for register names.
7771 z -- print the opcode suffix for the size of the current operand.
7772 * -- print a star (in certain assembler syntax)
7773 A -- print an absolute memory reference.
7774 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7775 s -- print a shift double count, followed by the assemblers argument
7777 b -- print the QImode name of the register for the indicated operand.
7778 %b0 would print %al if operands[0] is reg 0.
7779 w -- likewise, print the HImode name of the register.
7780 k -- likewise, print the SImode name of the register.
7781 q -- likewise, print the DImode name of the register.
7782 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7783 y -- print "st(0)" instead of "st" as a register.
7784 D -- print condition for SSE cmp instruction.
7785 P -- if PIC, print an @PLT suffix.
7786 X -- don't print any sort of PIC '@' suffix for a symbol.
7787 & -- print some in-use local-dynamic symbol name.
7788 H -- print a memory address offset by 8; used for sse high-parts
7792 print_operand (FILE *file, rtx x, int code)
7799 if (ASSEMBLER_DIALECT == ASM_ATT)
7804 assemble_name (file, get_some_local_dynamic_name ());
7808 switch (ASSEMBLER_DIALECT)
7815 /* Intel syntax. For absolute addresses, registers should not
7816 be surrounded by braces. */
7817 if (GET_CODE (x) != REG)
7820 PRINT_OPERAND (file, x, 0);
7830 PRINT_OPERAND (file, x, 0);
7835 if (ASSEMBLER_DIALECT == ASM_ATT)
7840 if (ASSEMBLER_DIALECT == ASM_ATT)
7845 if (ASSEMBLER_DIALECT == ASM_ATT)
7850 if (ASSEMBLER_DIALECT == ASM_ATT)
7855 if (ASSEMBLER_DIALECT == ASM_ATT)
7860 if (ASSEMBLER_DIALECT == ASM_ATT)
7865 /* 387 opcodes don't get size suffixes if the operands are
7867 if (STACK_REG_P (x))
7870 /* Likewise if using Intel opcodes. */
7871 if (ASSEMBLER_DIALECT == ASM_INTEL)
7874 /* This is the size of op from size of operand. */
7875 switch (GET_MODE_SIZE (GET_MODE (x)))
7878 #ifdef HAVE_GAS_FILDS_FISTS
7884 if (GET_MODE (x) == SFmode)
7899 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7901 #ifdef GAS_MNEMONICS
7927 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7929 PRINT_OPERAND (file, x, 0);
7935 /* Little bit of braindamage here. The SSE compare instructions
7936 does use completely different names for the comparisons that the
7937 fp conditional moves. */
7938 switch (GET_CODE (x))
7953 fputs ("unord", file);
7957 fputs ("neq", file);
7961 fputs ("nlt", file);
7965 fputs ("nle", file);
7968 fputs ("ord", file);
7975 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7976 if (ASSEMBLER_DIALECT == ASM_ATT)
7978 switch (GET_MODE (x))
7980 case HImode: putc ('w', file); break;
7982 case SFmode: putc ('l', file); break;
7984 case DFmode: putc ('q', file); break;
7985 default: gcc_unreachable ();
7992 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7995 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7996 if (ASSEMBLER_DIALECT == ASM_ATT)
7999 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8002 /* Like above, but reverse condition */
8004 /* Check to see if argument to %c is really a constant
8005 and not a condition code which needs to be reversed. */
8006 if (!COMPARISON_P (x))
8008 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8011 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8014 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8015 if (ASSEMBLER_DIALECT == ASM_ATT)
8018 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8022 /* It doesn't actually matter what mode we use here, as we're
8023 only going to use this for printing. */
8024 x = adjust_address_nv (x, DImode, 8);
8031 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8034 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8037 int pred_val = INTVAL (XEXP (x, 0));
8039 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8040 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8042 int taken = pred_val > REG_BR_PROB_BASE / 2;
8043 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8045 /* Emit hints only in the case default branch prediction
8046 heuristics would fail. */
8047 if (taken != cputaken)
8049 /* We use 3e (DS) prefix for taken branches and
8050 2e (CS) prefix for not taken branches. */
8052 fputs ("ds ; ", file);
8054 fputs ("cs ; ", file);
8061 output_operand_lossage ("invalid operand code '%c'", code);
8065 if (GET_CODE (x) == REG)
8066 print_reg (x, code, file);
8068 else if (GET_CODE (x) == MEM)
8070 /* No `byte ptr' prefix for call instructions. */
8071 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8074 switch (GET_MODE_SIZE (GET_MODE (x)))
8076 case 1: size = "BYTE"; break;
8077 case 2: size = "WORD"; break;
8078 case 4: size = "DWORD"; break;
8079 case 8: size = "QWORD"; break;
8080 case 12: size = "XWORD"; break;
8081 case 16: size = "XMMWORD"; break;
8086 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8089 else if (code == 'w')
8091 else if (code == 'k')
8095 fputs (" PTR ", file);
8099 /* Avoid (%rip) for call operands. */
8100 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8101 && GET_CODE (x) != CONST_INT)
8102 output_addr_const (file, x);
8103 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8104 output_operand_lossage ("invalid constraints for operand");
8109 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8114 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8115 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8117 if (ASSEMBLER_DIALECT == ASM_ATT)
8119 fprintf (file, "0x%08lx", l);
8122 /* These float cases don't actually occur as immediate operands. */
8123 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8127 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8128 fprintf (file, "%s", dstr);
8131 else if (GET_CODE (x) == CONST_DOUBLE
8132 && GET_MODE (x) == XFmode)
8136 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8137 fprintf (file, "%s", dstr);
8142 /* We have patterns that allow zero sets of memory, for instance.
8143 In 64-bit mode, we should probably support all 8-byte vectors,
8144 since we can in fact encode that into an immediate. */
8145 if (GET_CODE (x) == CONST_VECTOR)
8147 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8153 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8155 if (ASSEMBLER_DIALECT == ASM_ATT)
8158 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8159 || GET_CODE (x) == LABEL_REF)
8161 if (ASSEMBLER_DIALECT == ASM_ATT)
8164 fputs ("OFFSET FLAT:", file);
8167 if (GET_CODE (x) == CONST_INT)
8168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8170 output_pic_addr_const (file, x, code);
8172 output_addr_const (file, x);
8176 /* Print a memory operand whose address is ADDR. */
8179 print_operand_address (FILE *file, rtx addr)
8181 struct ix86_address parts;
8182 rtx base, index, disp;
8184 int ok = ix86_decompose_address (addr, &parts);
8189 index = parts.index;
8191 scale = parts.scale;
8199 if (USER_LABEL_PREFIX[0] == 0)
8201 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8207 if (!base && !index)
8209 /* Displacement only requires special attention. */
8211 if (GET_CODE (disp) == CONST_INT)
8213 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8215 if (USER_LABEL_PREFIX[0] == 0)
8217 fputs ("ds:", file);
8219 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8222 output_pic_addr_const (file, disp, 0);
8224 output_addr_const (file, disp);
8226 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8229 if (GET_CODE (disp) == CONST
8230 && GET_CODE (XEXP (disp, 0)) == PLUS
8231 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8232 disp = XEXP (XEXP (disp, 0), 0);
8233 if (GET_CODE (disp) == LABEL_REF
8234 || (GET_CODE (disp) == SYMBOL_REF
8235 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8236 fputs ("(%rip)", file);
8241 if (ASSEMBLER_DIALECT == ASM_ATT)
8246 output_pic_addr_const (file, disp, 0);
8247 else if (GET_CODE (disp) == LABEL_REF)
8248 output_asm_label (disp);
8250 output_addr_const (file, disp);
8255 print_reg (base, 0, file);
8259 print_reg (index, 0, file);
8261 fprintf (file, ",%d", scale);
8267 rtx offset = NULL_RTX;
8271 /* Pull out the offset of a symbol; print any symbol itself. */
8272 if (GET_CODE (disp) == CONST
8273 && GET_CODE (XEXP (disp, 0)) == PLUS
8274 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8276 offset = XEXP (XEXP (disp, 0), 1);
8277 disp = gen_rtx_CONST (VOIDmode,
8278 XEXP (XEXP (disp, 0), 0));
8282 output_pic_addr_const (file, disp, 0);
8283 else if (GET_CODE (disp) == LABEL_REF)
8284 output_asm_label (disp);
8285 else if (GET_CODE (disp) == CONST_INT)
8288 output_addr_const (file, disp);
8294 print_reg (base, 0, file);
8297 if (INTVAL (offset) >= 0)
8299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8310 print_reg (index, 0, file);
8312 fprintf (file, "*%d", scale);
8320 output_addr_const_extra (FILE *file, rtx x)
8324 if (GET_CODE (x) != UNSPEC)
8327 op = XVECEXP (x, 0, 0);
8328 switch (XINT (x, 1))
8330 case UNSPEC_GOTTPOFF:
8331 output_addr_const (file, op);
8332 /* FIXME: This might be @TPOFF in Sun ld. */
8333 fputs ("@GOTTPOFF", file);
8336 output_addr_const (file, op);
8337 fputs ("@TPOFF", file);
8340 output_addr_const (file, op);
8342 fputs ("@TPOFF", file);
8344 fputs ("@NTPOFF", file);
8347 output_addr_const (file, op);
8348 fputs ("@DTPOFF", file);
8350 case UNSPEC_GOTNTPOFF:
8351 output_addr_const (file, op);
8353 fputs ("@GOTTPOFF(%rip)", file);
8355 fputs ("@GOTNTPOFF", file);
8357 case UNSPEC_INDNTPOFF:
8358 output_addr_const (file, op);
8359 fputs ("@INDNTPOFF", file);
8369 /* Split one or more DImode RTL references into pairs of SImode
8370 references. The RTL can be REG, offsettable MEM, integer constant, or
8371 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8372 split and "num" is its length. lo_half and hi_half are output arrays
8373 that parallel "operands". */
8376 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8380 rtx op = operands[num];
8382 /* simplify_subreg refuse to split volatile memory addresses,
8383 but we still have to handle it. */
8384 if (GET_CODE (op) == MEM)
8386 lo_half[num] = adjust_address (op, SImode, 0);
8387 hi_half[num] = adjust_address (op, SImode, 4);
8391 lo_half[num] = simplify_gen_subreg (SImode, op,
8392 GET_MODE (op) == VOIDmode
8393 ? DImode : GET_MODE (op), 0);
8394 hi_half[num] = simplify_gen_subreg (SImode, op,
8395 GET_MODE (op) == VOIDmode
8396 ? DImode : GET_MODE (op), 4);
8400 /* Split one or more TImode RTL references into pairs of DImode
8401 references. The RTL can be REG, offsettable MEM, integer constant, or
8402 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8403 split and "num" is its length. lo_half and hi_half are output arrays
8404 that parallel "operands". */
8407 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8411 rtx op = operands[num];
8413 /* simplify_subreg refuse to split volatile memory addresses, but we
8414 still have to handle it. */
8415 if (GET_CODE (op) == MEM)
8417 lo_half[num] = adjust_address (op, DImode, 0);
8418 hi_half[num] = adjust_address (op, DImode, 8);
8422 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8423 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8428 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8429 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8430 is the expression of the binary operation. The output may either be
8431 emitted here, or returned to the caller, like all output_* functions.
8433 There is no guarantee that the operands are the same mode, as they
8434 might be within FLOAT or FLOAT_EXTEND expressions. */
8436 #ifndef SYSV386_COMPAT
8437 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8438 wants to fix the assemblers because that causes incompatibility
8439 with gcc. No-one wants to fix gcc because that causes
8440 incompatibility with assemblers... You can use the option of
8441 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8442 #define SYSV386_COMPAT 1
8446 output_387_binary_op (rtx insn, rtx *operands)
8448 static char buf[30];
8451 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8453 #ifdef ENABLE_CHECKING
8454 /* Even if we do not want to check the inputs, this documents input
8455 constraints. Which helps in understanding the following code. */
8456 if (STACK_REG_P (operands[0])
8457 && ((REG_P (operands[1])
8458 && REGNO (operands[0]) == REGNO (operands[1])
8459 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8460 || (REG_P (operands[2])
8461 && REGNO (operands[0]) == REGNO (operands[2])
8462 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8463 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8466 gcc_assert (is_sse);
8469 switch (GET_CODE (operands[3]))
8472 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8473 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8481 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8482 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8490 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8491 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8499 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8500 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8514 if (GET_MODE (operands[0]) == SFmode)
8515 strcat (buf, "ss\t{%2, %0|%0, %2}");
8517 strcat (buf, "sd\t{%2, %0|%0, %2}");
8522 switch (GET_CODE (operands[3]))
8526 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8528 rtx temp = operands[2];
8529 operands[2] = operands[1];
8533 /* know operands[0] == operands[1]. */
8535 if (GET_CODE (operands[2]) == MEM)
8541 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8543 if (STACK_TOP_P (operands[0]))
8544 /* How is it that we are storing to a dead operand[2]?
8545 Well, presumably operands[1] is dead too. We can't
8546 store the result to st(0) as st(0) gets popped on this
8547 instruction. Instead store to operands[2] (which I
8548 think has to be st(1)). st(1) will be popped later.
8549 gcc <= 2.8.1 didn't have this check and generated
8550 assembly code that the Unixware assembler rejected. */
8551 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8553 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8557 if (STACK_TOP_P (operands[0]))
8558 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8560 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8565 if (GET_CODE (operands[1]) == MEM)
8571 if (GET_CODE (operands[2]) == MEM)
8577 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8580 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8581 derived assemblers, confusingly reverse the direction of
8582 the operation for fsub{r} and fdiv{r} when the
8583 destination register is not st(0). The Intel assembler
8584 doesn't have this brain damage. Read !SYSV386_COMPAT to
8585 figure out what the hardware really does. */
8586 if (STACK_TOP_P (operands[0]))
8587 p = "{p\t%0, %2|rp\t%2, %0}";
8589 p = "{rp\t%2, %0|p\t%0, %2}";
8591 if (STACK_TOP_P (operands[0]))
8592 /* As above for fmul/fadd, we can't store to st(0). */
8593 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8600 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8603 if (STACK_TOP_P (operands[0]))
8604 p = "{rp\t%0, %1|p\t%1, %0}";
8606 p = "{p\t%1, %0|rp\t%0, %1}";
8608 if (STACK_TOP_P (operands[0]))
8609 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8611 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8616 if (STACK_TOP_P (operands[0]))
8618 if (STACK_TOP_P (operands[1]))
8619 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8621 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8624 else if (STACK_TOP_P (operands[1]))
8627 p = "{\t%1, %0|r\t%0, %1}";
8629 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8635 p = "{r\t%2, %0|\t%0, %2}";
8637 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8650 /* Return needed mode for entity in optimize_mode_switching pass. */
8653 ix86_mode_needed (int entity, rtx insn)
8655 enum attr_i387_cw mode;
8657 /* The mode UNINITIALIZED is used to store control word after a
8658 function call or ASM pattern. The mode ANY specify that function
8659 has no requirements on the control word and make no changes in the
8660 bits we are interested in. */
8663 || (NONJUMP_INSN_P (insn)
8664 && (asm_noperands (PATTERN (insn)) >= 0
8665 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8666 return I387_CW_UNINITIALIZED;
8668 if (recog_memoized (insn) < 0)
8671 mode = get_attr_i387_cw (insn);
8676 if (mode == I387_CW_TRUNC)
8681 if (mode == I387_CW_FLOOR)
8686 if (mode == I387_CW_CEIL)
8691 if (mode == I387_CW_MASK_PM)
8702 /* Output code to initialize control word copies used by trunc?f?i and
8703 rounding patterns. CURRENT_MODE is set to current control word,
8704 while NEW_MODE is set to new control word. */
8707 emit_i387_cw_initialization (int mode)
8709 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8714 rtx reg = gen_reg_rtx (HImode);
8716 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8717 emit_move_insn (reg, copy_rtx (stored_mode));
8719 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8724 /* round toward zero (truncate) */
8725 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8726 slot = SLOT_CW_TRUNC;
8730 /* round down toward -oo */
8731 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8732 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8733 slot = SLOT_CW_FLOOR;
8737 /* round up toward +oo */
8738 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8739 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8740 slot = SLOT_CW_CEIL;
8743 case I387_CW_MASK_PM:
8744 /* mask precision exception for nearbyint() */
8745 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8746 slot = SLOT_CW_MASK_PM;
8758 /* round toward zero (truncate) */
8759 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8760 slot = SLOT_CW_TRUNC;
8764 /* round down toward -oo */
8765 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8766 slot = SLOT_CW_FLOOR;
8770 /* round up toward +oo */
8771 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8772 slot = SLOT_CW_CEIL;
8775 case I387_CW_MASK_PM:
8776 /* mask precision exception for nearbyint() */
8777 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8778 slot = SLOT_CW_MASK_PM;
8786 gcc_assert (slot < MAX_386_STACK_LOCALS);
8788 new_mode = assign_386_stack_local (HImode, slot);
8789 emit_move_insn (new_mode, reg);
8792 /* Output code for INSN to convert a float to a signed int. OPERANDS
8793 are the insn operands. The output may be [HSD]Imode and the input
8794 operand may be [SDX]Fmode. */
8797 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8799 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8800 int dimode_p = GET_MODE (operands[0]) == DImode;
8801 int round_mode = get_attr_i387_cw (insn);
8803 /* Jump through a hoop or two for DImode, since the hardware has no
8804 non-popping instruction. We used to do this a different way, but
8805 that was somewhat fragile and broke with post-reload splitters. */
8806 if ((dimode_p || fisttp) && !stack_top_dies)
8807 output_asm_insn ("fld\t%y1", operands);
8809 gcc_assert (STACK_TOP_P (operands[1]));
8810 gcc_assert (GET_CODE (operands[0]) == MEM);
8813 output_asm_insn ("fisttp%z0\t%0", operands);
8816 if (round_mode != I387_CW_ANY)
8817 output_asm_insn ("fldcw\t%3", operands);
8818 if (stack_top_dies || dimode_p)
8819 output_asm_insn ("fistp%z0\t%0", operands);
8821 output_asm_insn ("fist%z0\t%0", operands);
8822 if (round_mode != I387_CW_ANY)
8823 output_asm_insn ("fldcw\t%2", operands);
8829 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8830 have the values zero or one, indicates the ffreep insn's operand
8831 from the OPERANDS array. */
8834 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8836 if (TARGET_USE_FFREEP)
8837 #if HAVE_AS_IX86_FFREEP
8838 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8841 static char retval[] = ".word\t0xc_df";
8842 int regno = REGNO (operands[opno]);
8844 gcc_assert (FP_REGNO_P (regno));
8846 retval[9] = '0' + (regno - FIRST_STACK_REG);
8851 return opno ? "fstp\t%y1" : "fstp\t%y0";
8855 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8856 should be used. UNORDERED_P is true when fucom should be used. */
8859 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8862 rtx cmp_op0, cmp_op1;
8863 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8867 cmp_op0 = operands[0];
8868 cmp_op1 = operands[1];
8872 cmp_op0 = operands[1];
8873 cmp_op1 = operands[2];
8878 if (GET_MODE (operands[0]) == SFmode)
8880 return "ucomiss\t{%1, %0|%0, %1}";
8882 return "comiss\t{%1, %0|%0, %1}";
8885 return "ucomisd\t{%1, %0|%0, %1}";
8887 return "comisd\t{%1, %0|%0, %1}";
8890 gcc_assert (STACK_TOP_P (cmp_op0));
8892 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8894 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8898 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8899 return output_387_ffreep (operands, 1);
8902 return "ftst\n\tfnstsw\t%0";
8905 if (STACK_REG_P (cmp_op1)
8907 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8908 && REGNO (cmp_op1) != FIRST_STACK_REG)
8910 /* If both the top of the 387 stack dies, and the other operand
8911 is also a stack register that dies, then this must be a
8912 `fcompp' float compare */
8916 /* There is no double popping fcomi variant. Fortunately,
8917 eflags is immune from the fstp's cc clobbering. */
8919 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8921 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8922 return output_387_ffreep (operands, 0);
8927 return "fucompp\n\tfnstsw\t%0";
8929 return "fcompp\n\tfnstsw\t%0";
8934 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8936 static const char * const alt[16] =
8938 "fcom%z2\t%y2\n\tfnstsw\t%0",
8939 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8940 "fucom%z2\t%y2\n\tfnstsw\t%0",
8941 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8943 "ficom%z2\t%y2\n\tfnstsw\t%0",
8944 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8948 "fcomi\t{%y1, %0|%0, %y1}",
8949 "fcomip\t{%y1, %0|%0, %y1}",
8950 "fucomi\t{%y1, %0|%0, %y1}",
8951 "fucomip\t{%y1, %0|%0, %y1}",
8962 mask = eflags_p << 3;
8963 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8964 mask |= unordered_p << 1;
8965 mask |= stack_top_dies;
8967 gcc_assert (mask < 16);
8976 ix86_output_addr_vec_elt (FILE *file, int value)
8978 const char *directive = ASM_LONG;
8982 directive = ASM_QUAD;
8984 gcc_assert (!TARGET_64BIT);
8987 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8991 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8994 fprintf (file, "%s%s%d-%s%d\n",
8995 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8996 else if (HAVE_AS_GOTOFF_IN_DATA)
8997 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8999 else if (TARGET_MACHO)
9001 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9002 machopic_output_function_base_name (file);
9003 fprintf(file, "\n");
9007 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9008 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9011 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9015 ix86_expand_clear (rtx dest)
9019 /* We play register width games, which are only valid after reload. */
9020 gcc_assert (reload_completed);
9022 /* Avoid HImode and its attendant prefix byte. */
9023 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9024 dest = gen_rtx_REG (SImode, REGNO (dest));
9026 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9028 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9029 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9031 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9032 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9038 /* X is an unchanging MEM. If it is a constant pool reference, return
9039 the constant pool rtx, else NULL. */
9042 maybe_get_pool_constant (rtx x)
9044 x = ix86_delegitimize_address (XEXP (x, 0));
9046 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9047 return get_pool_constant (x);
9053 ix86_expand_move (enum machine_mode mode, rtx operands[])
9055 int strict = (reload_in_progress || reload_completed);
9057 enum tls_model model;
9062 if (GET_CODE (op1) == SYMBOL_REF)
9064 model = SYMBOL_REF_TLS_MODEL (op1);
9067 op1 = legitimize_tls_address (op1, model, true);
9068 op1 = force_operand (op1, op0);
9073 else if (GET_CODE (op1) == CONST
9074 && GET_CODE (XEXP (op1, 0)) == PLUS
9075 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9077 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9080 rtx addend = XEXP (XEXP (op1, 0), 1);
9081 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9082 op1 = force_operand (op1, NULL);
9083 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9084 op0, 1, OPTAB_DIRECT);
9090 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9092 if (TARGET_MACHO && !TARGET_64BIT)
9097 rtx temp = ((reload_in_progress
9098 || ((op0 && GET_CODE (op0) == REG)
9100 ? op0 : gen_reg_rtx (Pmode));
9101 op1 = machopic_indirect_data_reference (op1, temp);
9102 op1 = machopic_legitimize_pic_address (op1, mode,
9103 temp == op1 ? 0 : temp);
9105 else if (MACHOPIC_INDIRECT)
9106 op1 = machopic_indirect_data_reference (op1, 0);
9113 if (GET_CODE (op0) == MEM)
9114 op1 = force_reg (Pmode, op1);
9116 op1 = legitimize_address (op1, op1, Pmode);
9121 if (GET_CODE (op0) == MEM
9122 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9123 || !push_operand (op0, mode))
9124 && GET_CODE (op1) == MEM)
9125 op1 = force_reg (mode, op1);
9127 if (push_operand (op0, mode)
9128 && ! general_no_elim_operand (op1, mode))
9129 op1 = copy_to_mode_reg (mode, op1);
9131 /* Force large constants in 64bit compilation into register
9132 to get them CSEed. */
9133 if (TARGET_64BIT && mode == DImode
9134 && immediate_operand (op1, mode)
9135 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9136 && !register_operand (op0, mode)
9137 && optimize && !reload_completed && !reload_in_progress)
9138 op1 = copy_to_mode_reg (mode, op1);
9140 if (FLOAT_MODE_P (mode))
9142 /* If we are loading a floating point constant to a register,
9143 force the value to memory now, since we'll get better code
9144 out the back end. */
9148 else if (GET_CODE (op1) == CONST_DOUBLE)
9150 op1 = validize_mem (force_const_mem (mode, op1));
9151 if (!register_operand (op0, mode))
9153 rtx temp = gen_reg_rtx (mode);
9154 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9155 emit_move_insn (op0, temp);
9162 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9166 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9168 rtx op0 = operands[0], op1 = operands[1];
9170 /* Force constants other than zero into memory. We do not know how
9171 the instructions used to build constants modify the upper 64 bits
9172 of the register, once we have that information we may be able
9173 to handle some of them more efficiently. */
9174 if ((reload_in_progress | reload_completed) == 0
9175 && register_operand (op0, mode)
9177 && standard_sse_constant_p (op1) <= 0)
9178 op1 = validize_mem (force_const_mem (mode, op1));
9180 /* Make operand1 a register if it isn't already. */
9182 && !register_operand (op0, mode)
9183 && !register_operand (op1, mode))
9185 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9189 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9192 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9193 straight to ix86_expand_vector_move. */
9196 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9205 /* If we're optimizing for size, movups is the smallest. */
9208 op0 = gen_lowpart (V4SFmode, op0);
9209 op1 = gen_lowpart (V4SFmode, op1);
9210 emit_insn (gen_sse_movups (op0, op1));
9214 /* ??? If we have typed data, then it would appear that using
9215 movdqu is the only way to get unaligned data loaded with
9217 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9219 op0 = gen_lowpart (V16QImode, op0);
9220 op1 = gen_lowpart (V16QImode, op1);
9221 emit_insn (gen_sse2_movdqu (op0, op1));
9225 if (TARGET_SSE2 && mode == V2DFmode)
9229 /* When SSE registers are split into halves, we can avoid
9230 writing to the top half twice. */
9231 if (TARGET_SSE_SPLIT_REGS)
9233 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9238 /* ??? Not sure about the best option for the Intel chips.
9239 The following would seem to satisfy; the register is
9240 entirely cleared, breaking the dependency chain. We
9241 then store to the upper half, with a dependency depth
9242 of one. A rumor has it that Intel recommends two movsd
9243 followed by an unpacklpd, but this is unconfirmed. And
9244 given that the dependency depth of the unpacklpd would
9245 still be one, I'm not sure why this would be better. */
9246 zero = CONST0_RTX (V2DFmode);
9249 m = adjust_address (op1, DFmode, 0);
9250 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9251 m = adjust_address (op1, DFmode, 8);
9252 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9256 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9257 emit_move_insn (op0, CONST0_RTX (mode));
9259 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9261 if (mode != V4SFmode)
9262 op0 = gen_lowpart (V4SFmode, op0);
9263 m = adjust_address (op1, V2SFmode, 0);
9264 emit_insn (gen_sse_loadlps (op0, op0, m));
9265 m = adjust_address (op1, V2SFmode, 8);
9266 emit_insn (gen_sse_loadhps (op0, op0, m));
9269 else if (MEM_P (op0))
9271 /* If we're optimizing for size, movups is the smallest. */
9274 op0 = gen_lowpart (V4SFmode, op0);
9275 op1 = gen_lowpart (V4SFmode, op1);
9276 emit_insn (gen_sse_movups (op0, op1));
9280 /* ??? Similar to above, only less clear because of quote
9281 typeless stores unquote. */
9282 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9283 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9285 op0 = gen_lowpart (V16QImode, op0);
9286 op1 = gen_lowpart (V16QImode, op1);
9287 emit_insn (gen_sse2_movdqu (op0, op1));
9291 if (TARGET_SSE2 && mode == V2DFmode)
9293 m = adjust_address (op0, DFmode, 0);
9294 emit_insn (gen_sse2_storelpd (m, op1));
9295 m = adjust_address (op0, DFmode, 8);
9296 emit_insn (gen_sse2_storehpd (m, op1));
9300 if (mode != V4SFmode)
9301 op1 = gen_lowpart (V4SFmode, op1);
9302 m = adjust_address (op0, V2SFmode, 0);
9303 emit_insn (gen_sse_storelps (m, op1));
9304 m = adjust_address (op0, V2SFmode, 8);
9305 emit_insn (gen_sse_storehps (m, op1));
9312 /* Expand a push in MODE. This is some mode for which we do not support
9313 proper push instructions, at least from the registers that we expect
9314 the value to live in. */
9317 ix86_expand_push (enum machine_mode mode, rtx x)
9321 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9322 GEN_INT (-GET_MODE_SIZE (mode)),
9323 stack_pointer_rtx, 1, OPTAB_DIRECT);
9324 if (tmp != stack_pointer_rtx)
9325 emit_move_insn (stack_pointer_rtx, tmp);
9327 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9328 emit_move_insn (tmp, x);
9331 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9332 destination to use for the operation. If different from the true
9333 destination in operands[0], a copy operation will be required. */
9336 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9339 int matching_memory;
9340 rtx src1, src2, dst;
9346 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9347 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9348 && (rtx_equal_p (dst, src2)
9349 || immediate_operand (src1, mode)))
9356 /* If the destination is memory, and we do not have matching source
9357 operands, do things in registers. */
9358 matching_memory = 0;
9359 if (GET_CODE (dst) == MEM)
9361 if (rtx_equal_p (dst, src1))
9362 matching_memory = 1;
9363 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9364 && rtx_equal_p (dst, src2))
9365 matching_memory = 2;
9367 dst = gen_reg_rtx (mode);
9370 /* Both source operands cannot be in memory. */
9371 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9373 if (matching_memory != 2)
9374 src2 = force_reg (mode, src2);
9376 src1 = force_reg (mode, src1);
9379 /* If the operation is not commutable, source 1 cannot be a constant
9380 or non-matching memory. */
9381 if ((CONSTANT_P (src1)
9382 || (!matching_memory && GET_CODE (src1) == MEM))
9383 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9384 src1 = force_reg (mode, src1);
9386 src1 = operands[1] = src1;
9387 src2 = operands[2] = src2;
9391 /* Similarly, but assume that the destination has already been
9395 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9396 enum machine_mode mode, rtx operands[])
9398 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9399 gcc_assert (dst == operands[0]);
9402 /* Attempt to expand a binary operator. Make the expansion closer to the
9403 actual machine, then just general_operand, which will allow 3 separate
9404 memory references (one output, two input) in a single insn. */
9407 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9410 rtx src1, src2, dst, op, clob;
9412 dst = ix86_fixup_binary_operands (code, mode, operands);
9416 /* Emit the instruction. */
9418 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9419 if (reload_in_progress)
9421 /* Reload doesn't know about the flags register, and doesn't know that
9422 it doesn't want to clobber it. We can only do this with PLUS. */
9423 gcc_assert (code == PLUS);
9428 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9429 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9432 /* Fix up the destination if needed. */
9433 if (dst != operands[0])
9434 emit_move_insn (operands[0], dst);
9437 /* Return TRUE or FALSE depending on whether the binary operator meets the
9438 appropriate constraints. */
9441 ix86_binary_operator_ok (enum rtx_code code,
9442 enum machine_mode mode ATTRIBUTE_UNUSED,
9445 /* Both source operands cannot be in memory. */
9446 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9448 /* If the operation is not commutable, source 1 cannot be a constant. */
9449 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9451 /* If the destination is memory, we must have a matching source operand. */
9452 if (GET_CODE (operands[0]) == MEM
9453 && ! (rtx_equal_p (operands[0], operands[1])
9454 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9455 && rtx_equal_p (operands[0], operands[2]))))
9457 /* If the operation is not commutable and the source 1 is memory, we must
9458 have a matching destination. */
9459 if (GET_CODE (operands[1]) == MEM
9460 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9461 && ! rtx_equal_p (operands[0], operands[1]))
9466 /* Attempt to expand a unary operator. Make the expansion closer to the
9467 actual machine, then just general_operand, which will allow 2 separate
9468 memory references (one output, one input) in a single insn. */
9471 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9474 int matching_memory;
9475 rtx src, dst, op, clob;
9480 /* If the destination is memory, and we do not have matching source
9481 operands, do things in registers. */
9482 matching_memory = 0;
9485 if (rtx_equal_p (dst, src))
9486 matching_memory = 1;
9488 dst = gen_reg_rtx (mode);
9491 /* When source operand is memory, destination must match. */
9492 if (MEM_P (src) && !matching_memory)
9493 src = force_reg (mode, src);
9495 /* Emit the instruction. */
9497 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9498 if (reload_in_progress || code == NOT)
9500 /* Reload doesn't know about the flags register, and doesn't know that
9501 it doesn't want to clobber it. */
9502 gcc_assert (code == NOT);
9507 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9508 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9511 /* Fix up the destination if needed. */
9512 if (dst != operands[0])
9513 emit_move_insn (operands[0], dst);
9516 /* Return TRUE or FALSE depending on whether the unary operator meets the
9517 appropriate constraints. */
9520 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9521 enum machine_mode mode ATTRIBUTE_UNUSED,
9522 rtx operands[2] ATTRIBUTE_UNUSED)
9524 /* If one of operands is memory, source and destination must match. */
9525 if ((GET_CODE (operands[0]) == MEM
9526 || GET_CODE (operands[1]) == MEM)
9527 && ! rtx_equal_p (operands[0], operands[1]))
9532 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9533 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9534 true, then replicate the mask for all elements of the vector register.
9535 If INVERT is true, then create a mask excluding the sign bit. */
9538 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9540 enum machine_mode vec_mode;
9541 HOST_WIDE_INT hi, lo;
9546 /* Find the sign bit, sign extended to 2*HWI. */
9548 lo = 0x80000000, hi = lo < 0;
9549 else if (HOST_BITS_PER_WIDE_INT >= 64)
9550 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9552 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9557 /* Force this value into the low part of a fp vector constant. */
9558 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9559 mask = gen_lowpart (mode, mask);
9564 v = gen_rtvec (4, mask, mask, mask, mask);
9566 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9567 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9568 vec_mode = V4SFmode;
9573 v = gen_rtvec (2, mask, mask);
9575 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9576 vec_mode = V2DFmode;
9579 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9582 /* Generate code for floating point ABS or NEG. */
9585 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9588 rtx mask, set, use, clob, dst, src;
9589 bool matching_memory;
9590 bool use_sse = false;
9591 bool vector_mode = VECTOR_MODE_P (mode);
9592 enum machine_mode elt_mode = mode;
9596 elt_mode = GET_MODE_INNER (mode);
9599 else if (TARGET_SSE_MATH)
9600 use_sse = SSE_FLOAT_MODE_P (mode);
9602 /* NEG and ABS performed with SSE use bitwise mask operations.
9603 Create the appropriate mask now. */
9605 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9612 /* If the destination is memory, and we don't have matching source
9613 operands or we're using the x87, do things in registers. */
9614 matching_memory = false;
9617 if (use_sse && rtx_equal_p (dst, src))
9618 matching_memory = true;
9620 dst = gen_reg_rtx (mode);
9622 if (MEM_P (src) && !matching_memory)
9623 src = force_reg (mode, src);
9627 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9628 set = gen_rtx_SET (VOIDmode, dst, set);
9633 set = gen_rtx_fmt_e (code, mode, src);
9634 set = gen_rtx_SET (VOIDmode, dst, set);
9637 use = gen_rtx_USE (VOIDmode, mask);
9638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9639 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9640 gen_rtvec (3, set, use, clob)));
9646 if (dst != operands[0])
9647 emit_move_insn (operands[0], dst);
9650 /* Expand a copysign operation. Special case operand 0 being a constant. */
9653 ix86_expand_copysign (rtx operands[])
9655 enum machine_mode mode, vmode;
9656 rtx dest, op0, op1, mask, nmask;
9662 mode = GET_MODE (dest);
9663 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9665 if (GET_CODE (op0) == CONST_DOUBLE)
9669 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9670 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9672 if (op0 == CONST0_RTX (mode))
9673 op0 = CONST0_RTX (vmode);
9677 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9678 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9680 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9681 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9684 mask = ix86_build_signbit_mask (mode, 0, 0);
9687 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9689 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9693 nmask = ix86_build_signbit_mask (mode, 0, 1);
9694 mask = ix86_build_signbit_mask (mode, 0, 0);
9697 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9699 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9703 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9704 be a constant, and so has already been expanded into a vector constant. */
9707 ix86_split_copysign_const (rtx operands[])
9709 enum machine_mode mode, vmode;
9710 rtx dest, op0, op1, mask, x;
9717 mode = GET_MODE (dest);
9718 vmode = GET_MODE (mask);
9720 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9721 x = gen_rtx_AND (vmode, dest, mask);
9722 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9724 if (op0 != CONST0_RTX (vmode))
9726 x = gen_rtx_IOR (vmode, dest, op0);
9727 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9731 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9732 so we have to do two masks. */
9735 ix86_split_copysign_var (rtx operands[])
9737 enum machine_mode mode, vmode;
9738 rtx dest, scratch, op0, op1, mask, nmask, x;
9741 scratch = operands[1];
9744 nmask = operands[4];
9747 mode = GET_MODE (dest);
9748 vmode = GET_MODE (mask);
9750 if (rtx_equal_p (op0, op1))
9752 /* Shouldn't happen often (it's useless, obviously), but when it does
9753 we'd generate incorrect code if we continue below. */
9754 emit_move_insn (dest, op0);
9758 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9760 gcc_assert (REGNO (op1) == REGNO (scratch));
9762 x = gen_rtx_AND (vmode, scratch, mask);
9763 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9766 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9767 x = gen_rtx_NOT (vmode, dest);
9768 x = gen_rtx_AND (vmode, x, op0);
9769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9773 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9775 x = gen_rtx_AND (vmode, scratch, mask);
9777 else /* alternative 2,4 */
9779 gcc_assert (REGNO (mask) == REGNO (scratch));
9780 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9781 x = gen_rtx_AND (vmode, scratch, op1);
9783 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9785 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9787 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9788 x = gen_rtx_AND (vmode, dest, nmask);
9790 else /* alternative 3,4 */
9792 gcc_assert (REGNO (nmask) == REGNO (dest));
9794 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9795 x = gen_rtx_AND (vmode, dest, op0);
9797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9800 x = gen_rtx_IOR (vmode, dest, scratch);
9801 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9804 /* Return TRUE or FALSE depending on whether the first SET in INSN
9805 has source and destination with matching CC modes, and that the
9806 CC mode is at least as constrained as REQ_MODE. */
9809 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9812 enum machine_mode set_mode;
9814 set = PATTERN (insn);
9815 if (GET_CODE (set) == PARALLEL)
9816 set = XVECEXP (set, 0, 0);
9817 gcc_assert (GET_CODE (set) == SET);
9818 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9820 set_mode = GET_MODE (SET_DEST (set));
9824 if (req_mode != CCNOmode
9825 && (req_mode != CCmode
9826 || XEXP (SET_SRC (set), 1) != const0_rtx))
9830 if (req_mode == CCGCmode)
9834 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9838 if (req_mode == CCZmode)
9848 return (GET_MODE (SET_SRC (set)) == set_mode);
9851 /* Generate insn patterns to do an integer compare of OPERANDS. */
9854 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9856 enum machine_mode cmpmode;
9859 cmpmode = SELECT_CC_MODE (code, op0, op1);
9860 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9862 /* This is very simple, but making the interface the same as in the
9863 FP case makes the rest of the code easier. */
9864 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9865 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9867 /* Return the test that should be put into the flags user, i.e.
9868 the bcc, scc, or cmov instruction. */
9869 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9872 /* Figure out whether to use ordered or unordered fp comparisons.
9873 Return the appropriate mode to use. */
9876 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9878 /* ??? In order to make all comparisons reversible, we do all comparisons
9879 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9880 all forms trapping and nontrapping comparisons, we can make inequality
9881 comparisons trapping again, since it results in better code when using
9882 FCOM based compares. */
9883 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9887 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9889 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9890 return ix86_fp_compare_mode (code);
9893 /* Only zero flag is needed. */
9895 case NE: /* ZF!=0 */
9897 /* Codes needing carry flag. */
9898 case GEU: /* CF=0 */
9899 case GTU: /* CF=0 & ZF=0 */
9900 case LTU: /* CF=1 */
9901 case LEU: /* CF=1 | ZF=1 */
9903 /* Codes possibly doable only with sign flag when
9904 comparing against zero. */
9905 case GE: /* SF=OF or SF=0 */
9906 case LT: /* SF<>OF or SF=1 */
9907 if (op1 == const0_rtx)
9910 /* For other cases Carry flag is not required. */
9912 /* Codes doable only with sign flag when comparing
9913 against zero, but we miss jump instruction for it
9914 so we need to use relational tests against overflow
9915 that thus needs to be zero. */
9916 case GT: /* ZF=0 & SF=OF */
9917 case LE: /* ZF=1 | SF<>OF */
9918 if (op1 == const0_rtx)
9922 /* strcmp pattern do (use flags) and combine may ask us for proper
9931 /* Return the fixed registers used for condition codes. */
9934 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9941 /* If two condition code modes are compatible, return a condition code
9942 mode which is compatible with both. Otherwise, return
9945 static enum machine_mode
9946 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9951 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9954 if ((m1 == CCGCmode && m2 == CCGOCmode)
9955 || (m1 == CCGOCmode && m2 == CCGCmode))
9983 /* These are only compatible with themselves, which we already
9989 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9992 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9994 enum rtx_code swapped_code = swap_condition (code);
9995 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9996 || (ix86_fp_comparison_cost (swapped_code)
9997 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10000 /* Swap, force into registers, or otherwise massage the two operands
10001 to a fp comparison. The operands are updated in place; the new
10002 comparison code is returned. */
10004 static enum rtx_code
10005 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10007 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10008 rtx op0 = *pop0, op1 = *pop1;
10009 enum machine_mode op_mode = GET_MODE (op0);
10010 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10012 /* All of the unordered compare instructions only work on registers.
10013 The same is true of the fcomi compare instructions. The XFmode
10014 compare instructions require registers except when comparing
10015 against zero or when converting operand 1 from fixed point to
10019 && (fpcmp_mode == CCFPUmode
10020 || (op_mode == XFmode
10021 && ! (standard_80387_constant_p (op0) == 1
10022 || standard_80387_constant_p (op1) == 1)
10023 && GET_CODE (op1) != FLOAT)
10024 || ix86_use_fcomi_compare (code)))
10026 op0 = force_reg (op_mode, op0);
10027 op1 = force_reg (op_mode, op1);
10031 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10032 things around if they appear profitable, otherwise force op0
10033 into a register. */
10035 if (standard_80387_constant_p (op0) == 0
10036 || (GET_CODE (op0) == MEM
10037 && ! (standard_80387_constant_p (op1) == 0
10038 || GET_CODE (op1) == MEM)))
10041 tmp = op0, op0 = op1, op1 = tmp;
10042 code = swap_condition (code);
10045 if (GET_CODE (op0) != REG)
10046 op0 = force_reg (op_mode, op0);
10048 if (CONSTANT_P (op1))
10050 int tmp = standard_80387_constant_p (op1);
10052 op1 = validize_mem (force_const_mem (op_mode, op1));
10056 op1 = force_reg (op_mode, op1);
10059 op1 = force_reg (op_mode, op1);
10063 /* Try to rearrange the comparison to make it cheaper. */
10064 if (ix86_fp_comparison_cost (code)
10065 > ix86_fp_comparison_cost (swap_condition (code))
10066 && (GET_CODE (op1) == REG || !no_new_pseudos))
10069 tmp = op0, op0 = op1, op1 = tmp;
10070 code = swap_condition (code);
10071 if (GET_CODE (op0) != REG)
10072 op0 = force_reg (op_mode, op0);
10080 /* Convert comparison codes we use to represent FP comparison to integer
10081 code that will result in proper branch. Return UNKNOWN if no such code
10085 ix86_fp_compare_code_to_integer (enum rtx_code code)
10114 /* Split comparison code CODE into comparisons we can do using branch
10115 instructions. BYPASS_CODE is comparison code for branch that will
10116 branch around FIRST_CODE and SECOND_CODE. If some of branches
10117 is not required, set value to UNKNOWN.
10118 We never require more than two branches. */
10121 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10122 enum rtx_code *first_code,
10123 enum rtx_code *second_code)
10125 *first_code = code;
10126 *bypass_code = UNKNOWN;
10127 *second_code = UNKNOWN;
10129 /* The fcomi comparison sets flags as follows:
10139 case GT: /* GTU - CF=0 & ZF=0 */
10140 case GE: /* GEU - CF=0 */
10141 case ORDERED: /* PF=0 */
10142 case UNORDERED: /* PF=1 */
10143 case UNEQ: /* EQ - ZF=1 */
10144 case UNLT: /* LTU - CF=1 */
10145 case UNLE: /* LEU - CF=1 | ZF=1 */
10146 case LTGT: /* EQ - ZF=0 */
10148 case LT: /* LTU - CF=1 - fails on unordered */
10149 *first_code = UNLT;
10150 *bypass_code = UNORDERED;
10152 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10153 *first_code = UNLE;
10154 *bypass_code = UNORDERED;
10156 case EQ: /* EQ - ZF=1 - fails on unordered */
10157 *first_code = UNEQ;
10158 *bypass_code = UNORDERED;
10160 case NE: /* NE - ZF=0 - fails on unordered */
10161 *first_code = LTGT;
10162 *second_code = UNORDERED;
10164 case UNGE: /* GEU - CF=0 - fails on unordered */
10166 *second_code = UNORDERED;
10168 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10170 *second_code = UNORDERED;
10173 gcc_unreachable ();
10175 if (!TARGET_IEEE_FP)
10177 *second_code = UNKNOWN;
10178 *bypass_code = UNKNOWN;
10182 /* Return cost of comparison done fcom + arithmetics operations on AX.
10183 All following functions do use number of instructions as a cost metrics.
10184 In future this should be tweaked to compute bytes for optimize_size and
10185 take into account performance of various instructions on various CPUs. */
10187 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10189 if (!TARGET_IEEE_FP)
10191 /* The cost of code output by ix86_expand_fp_compare. */
10215 gcc_unreachable ();
10219 /* Return cost of comparison done using fcomi operation.
10220 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10222 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10224 enum rtx_code bypass_code, first_code, second_code;
10225 /* Return arbitrarily high cost when instruction is not supported - this
10226 prevents gcc from using it. */
10229 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10230 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10233 /* Return cost of comparison done using sahf operation.
10234 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10236 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10238 enum rtx_code bypass_code, first_code, second_code;
10239 /* Return arbitrarily high cost when instruction is not preferred - this
10240 avoids gcc from using it. */
10241 if (!TARGET_USE_SAHF && !optimize_size)
10243 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10244 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10247 /* Compute cost of the comparison done using any method.
10248 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10250 ix86_fp_comparison_cost (enum rtx_code code)
10252 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10255 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10256 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10258 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10259 if (min > sahf_cost)
10261 if (min > fcomi_cost)
10266 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10269 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10270 rtx *second_test, rtx *bypass_test)
10272 enum machine_mode fpcmp_mode, intcmp_mode;
10274 int cost = ix86_fp_comparison_cost (code);
10275 enum rtx_code bypass_code, first_code, second_code;
10277 fpcmp_mode = ix86_fp_compare_mode (code);
10278 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10281 *second_test = NULL_RTX;
10283 *bypass_test = NULL_RTX;
10285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10287 /* Do fcomi/sahf based test when profitable. */
10288 if ((bypass_code == UNKNOWN || bypass_test)
10289 && (second_code == UNKNOWN || second_test)
10290 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10294 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10295 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10301 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10302 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10304 scratch = gen_reg_rtx (HImode);
10305 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10306 emit_insn (gen_x86_sahf_1 (scratch));
10309 /* The FP codes work out to act like unsigned. */
10310 intcmp_mode = fpcmp_mode;
10312 if (bypass_code != UNKNOWN)
10313 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10314 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10316 if (second_code != UNKNOWN)
10317 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10318 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10323 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10324 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10325 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10327 scratch = gen_reg_rtx (HImode);
10328 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10330 /* In the unordered case, we have to check C2 for NaN's, which
10331 doesn't happen to work out to anything nice combination-wise.
10332 So do some bit twiddling on the value we've got in AH to come
10333 up with an appropriate set of condition codes. */
10335 intcmp_mode = CCNOmode;
10340 if (code == GT || !TARGET_IEEE_FP)
10342 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10347 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10348 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10349 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10350 intcmp_mode = CCmode;
10356 if (code == LT && TARGET_IEEE_FP)
10358 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10359 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10360 intcmp_mode = CCmode;
10365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10371 if (code == GE || !TARGET_IEEE_FP)
10373 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10378 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10379 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10386 if (code == LE && TARGET_IEEE_FP)
10388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10389 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10390 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10391 intcmp_mode = CCmode;
10396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10402 if (code == EQ && TARGET_IEEE_FP)
10404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10405 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10406 intcmp_mode = CCmode;
10411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10418 if (code == NE && TARGET_IEEE_FP)
10420 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10421 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10427 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10433 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10437 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10442 gcc_unreachable ();
10446 /* Return the test that should be put into the flags user, i.e.
10447 the bcc, scc, or cmov instruction. */
10448 return gen_rtx_fmt_ee (code, VOIDmode,
10449 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10454 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10457 op0 = ix86_compare_op0;
10458 op1 = ix86_compare_op1;
10461 *second_test = NULL_RTX;
10463 *bypass_test = NULL_RTX;
10465 if (ix86_compare_emitted)
10467 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10468 ix86_compare_emitted = NULL_RTX;
10470 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10471 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10472 second_test, bypass_test);
10474 ret = ix86_expand_int_compare (code, op0, op1);
10479 /* Return true if the CODE will result in nontrivial jump sequence. */
10481 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10483 enum rtx_code bypass_code, first_code, second_code;
10486 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10487 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10491 ix86_expand_branch (enum rtx_code code, rtx label)
10495 /* If we have emitted a compare insn, go straight to simple.
10496 ix86_expand_compare won't emit anything if ix86_compare_emitted
10498 if (ix86_compare_emitted)
10501 switch (GET_MODE (ix86_compare_op0))
10507 tmp = ix86_expand_compare (code, NULL, NULL);
10508 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10509 gen_rtx_LABEL_REF (VOIDmode, label),
10511 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10520 enum rtx_code bypass_code, first_code, second_code;
10522 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10523 &ix86_compare_op1);
10525 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10527 /* Check whether we will use the natural sequence with one jump. If
10528 so, we can expand jump early. Otherwise delay expansion by
10529 creating compound insn to not confuse optimizers. */
10530 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10533 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10534 gen_rtx_LABEL_REF (VOIDmode, label),
10535 pc_rtx, NULL_RTX, NULL_RTX);
10539 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10540 ix86_compare_op0, ix86_compare_op1);
10541 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10542 gen_rtx_LABEL_REF (VOIDmode, label),
10544 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10546 use_fcomi = ix86_use_fcomi_compare (code);
10547 vec = rtvec_alloc (3 + !use_fcomi);
10548 RTVEC_ELT (vec, 0) = tmp;
10550 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10552 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10555 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10557 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10566 /* Expand DImode branch into multiple compare+branch. */
10568 rtx lo[2], hi[2], label2;
10569 enum rtx_code code1, code2, code3;
10570 enum machine_mode submode;
10572 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10574 tmp = ix86_compare_op0;
10575 ix86_compare_op0 = ix86_compare_op1;
10576 ix86_compare_op1 = tmp;
10577 code = swap_condition (code);
10579 if (GET_MODE (ix86_compare_op0) == DImode)
10581 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10582 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10587 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10588 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10592 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10593 avoid two branches. This costs one extra insn, so disable when
10594 optimizing for size. */
10596 if ((code == EQ || code == NE)
10598 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10603 if (hi[1] != const0_rtx)
10604 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10605 NULL_RTX, 0, OPTAB_WIDEN);
10608 if (lo[1] != const0_rtx)
10609 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10610 NULL_RTX, 0, OPTAB_WIDEN);
10612 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10613 NULL_RTX, 0, OPTAB_WIDEN);
10615 ix86_compare_op0 = tmp;
10616 ix86_compare_op1 = const0_rtx;
10617 ix86_expand_branch (code, label);
10621 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10622 op1 is a constant and the low word is zero, then we can just
10623 examine the high word. */
10625 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10628 case LT: case LTU: case GE: case GEU:
10629 ix86_compare_op0 = hi[0];
10630 ix86_compare_op1 = hi[1];
10631 ix86_expand_branch (code, label);
10637 /* Otherwise, we need two or three jumps. */
10639 label2 = gen_label_rtx ();
10642 code2 = swap_condition (code);
10643 code3 = unsigned_condition (code);
10647 case LT: case GT: case LTU: case GTU:
10650 case LE: code1 = LT; code2 = GT; break;
10651 case GE: code1 = GT; code2 = LT; break;
10652 case LEU: code1 = LTU; code2 = GTU; break;
10653 case GEU: code1 = GTU; code2 = LTU; break;
10655 case EQ: code1 = UNKNOWN; code2 = NE; break;
10656 case NE: code2 = UNKNOWN; break;
10659 gcc_unreachable ();
10664 * if (hi(a) < hi(b)) goto true;
10665 * if (hi(a) > hi(b)) goto false;
10666 * if (lo(a) < lo(b)) goto true;
10670 ix86_compare_op0 = hi[0];
10671 ix86_compare_op1 = hi[1];
10673 if (code1 != UNKNOWN)
10674 ix86_expand_branch (code1, label);
10675 if (code2 != UNKNOWN)
10676 ix86_expand_branch (code2, label2);
10678 ix86_compare_op0 = lo[0];
10679 ix86_compare_op1 = lo[1];
10680 ix86_expand_branch (code3, label);
10682 if (code2 != UNKNOWN)
10683 emit_label (label2);
10688 gcc_unreachable ();
10692 /* Split branch based on floating point condition. */
10694 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10695 rtx target1, rtx target2, rtx tmp, rtx pushed)
10697 rtx second, bypass;
10698 rtx label = NULL_RTX;
10700 int bypass_probability = -1, second_probability = -1, probability = -1;
10703 if (target2 != pc_rtx)
10706 code = reverse_condition_maybe_unordered (code);
10711 condition = ix86_expand_fp_compare (code, op1, op2,
10712 tmp, &second, &bypass);
10714 /* Remove pushed operand from stack. */
10716 ix86_free_from_memory (GET_MODE (pushed));
10718 if (split_branch_probability >= 0)
10720 /* Distribute the probabilities across the jumps.
10721 Assume the BYPASS and SECOND to be always test
10723 probability = split_branch_probability;
10725 /* Value of 1 is low enough to make no need for probability
10726 to be updated. Later we may run some experiments and see
10727 if unordered values are more frequent in practice. */
10729 bypass_probability = 1;
10731 second_probability = 1;
10733 if (bypass != NULL_RTX)
10735 label = gen_label_rtx ();
10736 i = emit_jump_insn (gen_rtx_SET
10738 gen_rtx_IF_THEN_ELSE (VOIDmode,
10740 gen_rtx_LABEL_REF (VOIDmode,
10743 if (bypass_probability >= 0)
10745 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10746 GEN_INT (bypass_probability),
10749 i = emit_jump_insn (gen_rtx_SET
10751 gen_rtx_IF_THEN_ELSE (VOIDmode,
10752 condition, target1, target2)));
10753 if (probability >= 0)
10755 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10756 GEN_INT (probability),
10758 if (second != NULL_RTX)
10760 i = emit_jump_insn (gen_rtx_SET
10762 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10764 if (second_probability >= 0)
10766 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10767 GEN_INT (second_probability),
10770 if (label != NULL_RTX)
10771 emit_label (label);
10775 ix86_expand_setcc (enum rtx_code code, rtx dest)
10777 rtx ret, tmp, tmpreg, equiv;
10778 rtx second_test, bypass_test;
10780 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10781 return 0; /* FAIL */
10783 gcc_assert (GET_MODE (dest) == QImode);
10785 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10786 PUT_MODE (ret, QImode);
10791 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10792 if (bypass_test || second_test)
10794 rtx test = second_test;
10796 rtx tmp2 = gen_reg_rtx (QImode);
10799 gcc_assert (!second_test);
10800 test = bypass_test;
10802 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10804 PUT_MODE (test, QImode);
10805 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10808 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10810 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10813 /* Attach a REG_EQUAL note describing the comparison result. */
10814 if (ix86_compare_op0 && ix86_compare_op1)
10816 equiv = simplify_gen_relational (code, QImode,
10817 GET_MODE (ix86_compare_op0),
10818 ix86_compare_op0, ix86_compare_op1);
10819 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10822 return 1; /* DONE */
10825 /* Expand comparison setting or clearing carry flag. Return true when
10826 successful and set pop for the operation. */
10828 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10830 enum machine_mode mode =
10831 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10833 /* Do not handle DImode compares that go through special path. Also we can't
10834 deal with FP compares yet. This is possible to add. */
10835 if (mode == (TARGET_64BIT ? TImode : DImode))
10837 if (FLOAT_MODE_P (mode))
10839 rtx second_test = NULL, bypass_test = NULL;
10840 rtx compare_op, compare_seq;
10842 /* Shortcut: following common codes never translate into carry flag compares. */
10843 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10844 || code == ORDERED || code == UNORDERED)
10847 /* These comparisons require zero flag; swap operands so they won't. */
10848 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10849 && !TARGET_IEEE_FP)
10854 code = swap_condition (code);
10857 /* Try to expand the comparison and verify that we end up with carry flag
10858 based comparison. This is fails to be true only when we decide to expand
10859 comparison using arithmetic that is not too common scenario. */
10861 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10862 &second_test, &bypass_test);
10863 compare_seq = get_insns ();
10866 if (second_test || bypass_test)
10868 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10869 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10870 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10872 code = GET_CODE (compare_op);
10873 if (code != LTU && code != GEU)
10875 emit_insn (compare_seq);
10879 if (!INTEGRAL_MODE_P (mode))
10887 /* Convert a==0 into (unsigned)a<1. */
10890 if (op1 != const0_rtx)
10893 code = (code == EQ ? LTU : GEU);
10896 /* Convert a>b into b<a or a>=b-1. */
10899 if (GET_CODE (op1) == CONST_INT)
10901 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10902 /* Bail out on overflow. We still can swap operands but that
10903 would force loading of the constant into register. */
10904 if (op1 == const0_rtx
10905 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10907 code = (code == GTU ? GEU : LTU);
10914 code = (code == GTU ? LTU : GEU);
10918 /* Convert a>=0 into (unsigned)a<0x80000000. */
10921 if (mode == DImode || op1 != const0_rtx)
10923 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10924 code = (code == LT ? GEU : LTU);
10928 if (mode == DImode || op1 != constm1_rtx)
10930 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10931 code = (code == LE ? GEU : LTU);
10937 /* Swapping operands may cause constant to appear as first operand. */
10938 if (!nonimmediate_operand (op0, VOIDmode))
10940 if (no_new_pseudos)
10942 op0 = force_reg (mode, op0);
10944 ix86_compare_op0 = op0;
10945 ix86_compare_op1 = op1;
10946 *pop = ix86_expand_compare (code, NULL, NULL);
10947 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10952 ix86_expand_int_movcc (rtx operands[])
10954 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10955 rtx compare_seq, compare_op;
10956 rtx second_test, bypass_test;
10957 enum machine_mode mode = GET_MODE (operands[0]);
10958 bool sign_bit_compare_p = false;;
10961 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10962 compare_seq = get_insns ();
10965 compare_code = GET_CODE (compare_op);
10967 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10968 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10969 sign_bit_compare_p = true;
10971 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10972 HImode insns, we'd be swallowed in word prefix ops. */
10974 if ((mode != HImode || TARGET_FAST_PREFIX)
10975 && (mode != (TARGET_64BIT ? TImode : DImode))
10976 && GET_CODE (operands[2]) == CONST_INT
10977 && GET_CODE (operands[3]) == CONST_INT)
10979 rtx out = operands[0];
10980 HOST_WIDE_INT ct = INTVAL (operands[2]);
10981 HOST_WIDE_INT cf = INTVAL (operands[3]);
10982 HOST_WIDE_INT diff;
10985 /* Sign bit compares are better done using shifts than we do by using
10987 if (sign_bit_compare_p
10988 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10989 ix86_compare_op1, &compare_op))
10991 /* Detect overlap between destination and compare sources. */
10994 if (!sign_bit_compare_p)
10996 bool fpcmp = false;
10998 compare_code = GET_CODE (compare_op);
11000 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11001 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11004 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11007 /* To simplify rest of code, restrict to the GEU case. */
11008 if (compare_code == LTU)
11010 HOST_WIDE_INT tmp = ct;
11013 compare_code = reverse_condition (compare_code);
11014 code = reverse_condition (code);
11019 PUT_CODE (compare_op,
11020 reverse_condition_maybe_unordered
11021 (GET_CODE (compare_op)));
11023 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11027 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11028 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11029 tmp = gen_reg_rtx (mode);
11031 if (mode == DImode)
11032 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11034 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11038 if (code == GT || code == GE)
11039 code = reverse_condition (code);
11042 HOST_WIDE_INT tmp = ct;
11047 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11048 ix86_compare_op1, VOIDmode, 0, -1);
11061 tmp = expand_simple_binop (mode, PLUS,
11063 copy_rtx (tmp), 1, OPTAB_DIRECT);
11074 tmp = expand_simple_binop (mode, IOR,
11076 copy_rtx (tmp), 1, OPTAB_DIRECT);
11078 else if (diff == -1 && ct)
11088 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11090 tmp = expand_simple_binop (mode, PLUS,
11091 copy_rtx (tmp), GEN_INT (cf),
11092 copy_rtx (tmp), 1, OPTAB_DIRECT);
11100 * andl cf - ct, dest
11110 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11113 tmp = expand_simple_binop (mode, AND,
11115 gen_int_mode (cf - ct, mode),
11116 copy_rtx (tmp), 1, OPTAB_DIRECT);
11118 tmp = expand_simple_binop (mode, PLUS,
11119 copy_rtx (tmp), GEN_INT (ct),
11120 copy_rtx (tmp), 1, OPTAB_DIRECT);
11123 if (!rtx_equal_p (tmp, out))
11124 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11126 return 1; /* DONE */
11132 tmp = ct, ct = cf, cf = tmp;
11134 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11136 /* We may be reversing unordered compare to normal compare, that
11137 is not valid in general (we may convert non-trapping condition
11138 to trapping one), however on i386 we currently emit all
11139 comparisons unordered. */
11140 compare_code = reverse_condition_maybe_unordered (compare_code);
11141 code = reverse_condition_maybe_unordered (code);
11145 compare_code = reverse_condition (compare_code);
11146 code = reverse_condition (code);
11150 compare_code = UNKNOWN;
11151 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11152 && GET_CODE (ix86_compare_op1) == CONST_INT)
11154 if (ix86_compare_op1 == const0_rtx
11155 && (code == LT || code == GE))
11156 compare_code = code;
11157 else if (ix86_compare_op1 == constm1_rtx)
11161 else if (code == GT)
11166 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11167 if (compare_code != UNKNOWN
11168 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11169 && (cf == -1 || ct == -1))
11171 /* If lea code below could be used, only optimize
11172 if it results in a 2 insn sequence. */
11174 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11175 || diff == 3 || diff == 5 || diff == 9)
11176 || (compare_code == LT && ct == -1)
11177 || (compare_code == GE && cf == -1))
11180 * notl op1 (if necessary)
11188 code = reverse_condition (code);
11191 out = emit_store_flag (out, code, ix86_compare_op0,
11192 ix86_compare_op1, VOIDmode, 0, -1);
11194 out = expand_simple_binop (mode, IOR,
11196 out, 1, OPTAB_DIRECT);
11197 if (out != operands[0])
11198 emit_move_insn (operands[0], out);
11200 return 1; /* DONE */
11205 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11206 || diff == 3 || diff == 5 || diff == 9)
11207 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11209 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11215 * lea cf(dest*(ct-cf)),dest
11219 * This also catches the degenerate setcc-only case.
11225 out = emit_store_flag (out, code, ix86_compare_op0,
11226 ix86_compare_op1, VOIDmode, 0, 1);
11229 /* On x86_64 the lea instruction operates on Pmode, so we need
11230 to get arithmetics done in proper mode to match. */
11232 tmp = copy_rtx (out);
11236 out1 = copy_rtx (out);
11237 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11241 tmp = gen_rtx_PLUS (mode, tmp, out1);
11247 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11250 if (!rtx_equal_p (tmp, out))
11253 out = force_operand (tmp, copy_rtx (out));
11255 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11257 if (!rtx_equal_p (out, operands[0]))
11258 emit_move_insn (operands[0], copy_rtx (out));
11260 return 1; /* DONE */
11264 * General case: Jumpful:
11265 * xorl dest,dest cmpl op1, op2
11266 * cmpl op1, op2 movl ct, dest
11267 * setcc dest jcc 1f
11268 * decl dest movl cf, dest
11269 * andl (cf-ct),dest 1:
11272 * Size 20. Size 14.
11274 * This is reasonably steep, but branch mispredict costs are
11275 * high on modern cpus, so consider failing only if optimizing
11279 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11280 && BRANCH_COST >= 2)
11286 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11287 /* We may be reversing unordered compare to normal compare,
11288 that is not valid in general (we may convert non-trapping
11289 condition to trapping one), however on i386 we currently
11290 emit all comparisons unordered. */
11291 code = reverse_condition_maybe_unordered (code);
11294 code = reverse_condition (code);
11295 if (compare_code != UNKNOWN)
11296 compare_code = reverse_condition (compare_code);
11300 if (compare_code != UNKNOWN)
11302 /* notl op1 (if needed)
11307 For x < 0 (resp. x <= -1) there will be no notl,
11308 so if possible swap the constants to get rid of the
11310 True/false will be -1/0 while code below (store flag
11311 followed by decrement) is 0/-1, so the constants need
11312 to be exchanged once more. */
11314 if (compare_code == GE || !cf)
11316 code = reverse_condition (code);
11321 HOST_WIDE_INT tmp = cf;
11326 out = emit_store_flag (out, code, ix86_compare_op0,
11327 ix86_compare_op1, VOIDmode, 0, -1);
11331 out = emit_store_flag (out, code, ix86_compare_op0,
11332 ix86_compare_op1, VOIDmode, 0, 1);
11334 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11335 copy_rtx (out), 1, OPTAB_DIRECT);
11338 out = expand_simple_binop (mode, AND, copy_rtx (out),
11339 gen_int_mode (cf - ct, mode),
11340 copy_rtx (out), 1, OPTAB_DIRECT);
11342 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11343 copy_rtx (out), 1, OPTAB_DIRECT);
11344 if (!rtx_equal_p (out, operands[0]))
11345 emit_move_insn (operands[0], copy_rtx (out));
11347 return 1; /* DONE */
11351 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11353 /* Try a few things more with specific constants and a variable. */
11356 rtx var, orig_out, out, tmp;
11358 if (BRANCH_COST <= 2)
11359 return 0; /* FAIL */
11361 /* If one of the two operands is an interesting constant, load a
11362 constant with the above and mask it in with a logical operation. */
11364 if (GET_CODE (operands[2]) == CONST_INT)
11367 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11368 operands[3] = constm1_rtx, op = and_optab;
11369 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11370 operands[3] = const0_rtx, op = ior_optab;
11372 return 0; /* FAIL */
11374 else if (GET_CODE (operands[3]) == CONST_INT)
11377 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11378 operands[2] = constm1_rtx, op = and_optab;
11379 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11380 operands[2] = const0_rtx, op = ior_optab;
11382 return 0; /* FAIL */
11385 return 0; /* FAIL */
11387 orig_out = operands[0];
11388 tmp = gen_reg_rtx (mode);
11391 /* Recurse to get the constant loaded. */
11392 if (ix86_expand_int_movcc (operands) == 0)
11393 return 0; /* FAIL */
11395 /* Mask in the interesting variable. */
11396 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11398 if (!rtx_equal_p (out, orig_out))
11399 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11401 return 1; /* DONE */
11405 * For comparison with above,
11415 if (! nonimmediate_operand (operands[2], mode))
11416 operands[2] = force_reg (mode, operands[2]);
11417 if (! nonimmediate_operand (operands[3], mode))
11418 operands[3] = force_reg (mode, operands[3]);
11420 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11422 rtx tmp = gen_reg_rtx (mode);
11423 emit_move_insn (tmp, operands[3]);
11426 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11428 rtx tmp = gen_reg_rtx (mode);
11429 emit_move_insn (tmp, operands[2]);
11433 if (! register_operand (operands[2], VOIDmode)
11435 || ! register_operand (operands[3], VOIDmode)))
11436 operands[2] = force_reg (mode, operands[2]);
11439 && ! register_operand (operands[3], VOIDmode))
11440 operands[3] = force_reg (mode, operands[3]);
11442 emit_insn (compare_seq);
11443 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11444 gen_rtx_IF_THEN_ELSE (mode,
11445 compare_op, operands[2],
11448 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11449 gen_rtx_IF_THEN_ELSE (mode,
11451 copy_rtx (operands[3]),
11452 copy_rtx (operands[0]))));
11454 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11455 gen_rtx_IF_THEN_ELSE (mode,
11457 copy_rtx (operands[2]),
11458 copy_rtx (operands[0]))));
11460 return 1; /* DONE */
11463 /* Swap, force into registers, or otherwise massage the two operands
11464 to an sse comparison with a mask result. Thus we differ a bit from
11465 ix86_prepare_fp_compare_args which expects to produce a flags result.
11467 The DEST operand exists to help determine whether to commute commutative
11468 operators. The POP0/POP1 operands are updated in place. The new
11469 comparison code is returned, or UNKNOWN if not implementable. */
11471 static enum rtx_code
11472 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11473 rtx *pop0, rtx *pop1)
11481 /* We have no LTGT as an operator. We could implement it with
11482 NE & ORDERED, but this requires an extra temporary. It's
11483 not clear that it's worth it. */
11490 /* These are supported directly. */
11497 /* For commutative operators, try to canonicalize the destination
11498 operand to be first in the comparison - this helps reload to
11499 avoid extra moves. */
11500 if (!dest || !rtx_equal_p (dest, *pop1))
11508 /* These are not supported directly. Swap the comparison operands
11509 to transform into something that is supported. */
11513 code = swap_condition (code);
11517 gcc_unreachable ();
11523 /* Detect conditional moves that exactly match min/max operational
11524 semantics. Note that this is IEEE safe, as long as we don't
11525 interchange the operands.
11527 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11528 and TRUE if the operation is successful and instructions are emitted. */
11531 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11532 rtx cmp_op1, rtx if_true, rtx if_false)
11534 enum machine_mode mode;
11540 else if (code == UNGE)
11543 if_true = if_false;
11549 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11551 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11556 mode = GET_MODE (dest);
11558 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11559 but MODE may be a vector mode and thus not appropriate. */
11560 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11562 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11565 if_true = force_reg (mode, if_true);
11566 v = gen_rtvec (2, if_true, if_false);
11567 tmp = gen_rtx_UNSPEC (mode, v, u);
11571 code = is_min ? SMIN : SMAX;
11572 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11575 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11579 /* Expand an sse vector comparison. Return the register with the result. */
11582 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11583 rtx op_true, rtx op_false)
11585 enum machine_mode mode = GET_MODE (dest);
11588 cmp_op0 = force_reg (mode, cmp_op0);
11589 if (!nonimmediate_operand (cmp_op1, mode))
11590 cmp_op1 = force_reg (mode, cmp_op1);
11593 || reg_overlap_mentioned_p (dest, op_true)
11594 || reg_overlap_mentioned_p (dest, op_false))
11595 dest = gen_reg_rtx (mode);
11597 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11598 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11603 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11604 operations. This is used for both scalar and vector conditional moves. */
11607 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11609 enum machine_mode mode = GET_MODE (dest);
11612 if (op_false == CONST0_RTX (mode))
11614 op_true = force_reg (mode, op_true);
11615 x = gen_rtx_AND (mode, cmp, op_true);
11616 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11618 else if (op_true == CONST0_RTX (mode))
11620 op_false = force_reg (mode, op_false);
11621 x = gen_rtx_NOT (mode, cmp);
11622 x = gen_rtx_AND (mode, x, op_false);
11623 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11627 op_true = force_reg (mode, op_true);
11628 op_false = force_reg (mode, op_false);
11630 t2 = gen_reg_rtx (mode);
11632 t3 = gen_reg_rtx (mode);
11636 x = gen_rtx_AND (mode, op_true, cmp);
11637 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11639 x = gen_rtx_NOT (mode, cmp);
11640 x = gen_rtx_AND (mode, x, op_false);
11641 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11643 x = gen_rtx_IOR (mode, t3, t2);
11644 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11648 /* Expand a floating-point conditional move. Return true if successful. */
11651 ix86_expand_fp_movcc (rtx operands[])
11653 enum machine_mode mode = GET_MODE (operands[0]);
11654 enum rtx_code code = GET_CODE (operands[1]);
11655 rtx tmp, compare_op, second_test, bypass_test;
11657 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11659 enum machine_mode cmode;
11661 /* Since we've no cmove for sse registers, don't force bad register
11662 allocation just to gain access to it. Deny movcc when the
11663 comparison mode doesn't match the move mode. */
11664 cmode = GET_MODE (ix86_compare_op0);
11665 if (cmode == VOIDmode)
11666 cmode = GET_MODE (ix86_compare_op1);
11670 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11672 &ix86_compare_op1);
11673 if (code == UNKNOWN)
11676 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11677 ix86_compare_op1, operands[2],
11681 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11682 ix86_compare_op1, operands[2], operands[3]);
11683 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11687 /* The floating point conditional move instructions don't directly
11688 support conditions resulting from a signed integer comparison. */
11690 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11692 /* The floating point conditional move instructions don't directly
11693 support signed integer comparisons. */
11695 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11697 gcc_assert (!second_test && !bypass_test);
11698 tmp = gen_reg_rtx (QImode);
11699 ix86_expand_setcc (code, tmp);
11701 ix86_compare_op0 = tmp;
11702 ix86_compare_op1 = const0_rtx;
11703 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11705 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11707 tmp = gen_reg_rtx (mode);
11708 emit_move_insn (tmp, operands[3]);
11711 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11713 tmp = gen_reg_rtx (mode);
11714 emit_move_insn (tmp, operands[2]);
11718 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11719 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11720 operands[2], operands[3])));
11722 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11723 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11724 operands[3], operands[0])));
11726 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11727 gen_rtx_IF_THEN_ELSE (mode, second_test,
11728 operands[2], operands[0])));
11733 /* Expand a floating-point vector conditional move; a vcond operation
11734 rather than a movcc operation. */
11737 ix86_expand_fp_vcond (rtx operands[])
11739 enum rtx_code code = GET_CODE (operands[3]);
11742 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11743 &operands[4], &operands[5]);
11744 if (code == UNKNOWN)
11747 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11748 operands[5], operands[1], operands[2]))
11751 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11752 operands[1], operands[2]);
11753 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11757 /* Expand a signed integral vector conditional move. */
11760 ix86_expand_int_vcond (rtx operands[])
11762 enum machine_mode mode = GET_MODE (operands[0]);
11763 enum rtx_code code = GET_CODE (operands[3]);
11764 bool negate = false;
11767 cop0 = operands[4];
11768 cop1 = operands[5];
11770 /* Canonicalize the comparison to EQ, GT, GTU. */
11781 code = reverse_condition (code);
11787 code = reverse_condition (code);
11793 code = swap_condition (code);
11794 x = cop0, cop0 = cop1, cop1 = x;
11798 gcc_unreachable ();
11801 /* Unsigned parallel compare is not supported by the hardware. Play some
11802 tricks to turn this into a signed comparison against 0. */
11805 cop0 = force_reg (mode, cop0);
11813 /* Perform a parallel modulo subtraction. */
11814 t1 = gen_reg_rtx (mode);
11815 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11817 /* Extract the original sign bit of op0. */
11818 mask = GEN_INT (-0x80000000);
11819 mask = gen_rtx_CONST_VECTOR (mode,
11820 gen_rtvec (4, mask, mask, mask, mask));
11821 mask = force_reg (mode, mask);
11822 t2 = gen_reg_rtx (mode);
11823 emit_insn (gen_andv4si3 (t2, cop0, mask));
11825 /* XOR it back into the result of the subtraction. This results
11826 in the sign bit set iff we saw unsigned underflow. */
11827 x = gen_reg_rtx (mode);
11828 emit_insn (gen_xorv4si3 (x, t1, t2));
11836 /* Perform a parallel unsigned saturating subtraction. */
11837 x = gen_reg_rtx (mode);
11838 emit_insn (gen_rtx_SET (VOIDmode, x,
11839 gen_rtx_US_MINUS (mode, cop0, cop1)));
11846 gcc_unreachable ();
11850 cop1 = CONST0_RTX (mode);
11853 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11854 operands[1+negate], operands[2-negate]);
11856 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11857 operands[2-negate]);
11861 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11862 true if we should do zero extension, else sign extension. HIGH_P is
11863 true if we want the N/2 high elements, else the low elements. */
11866 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11868 enum machine_mode imode = GET_MODE (operands[1]);
11869 rtx (*unpack)(rtx, rtx, rtx);
11876 unpack = gen_vec_interleave_highv16qi;
11878 unpack = gen_vec_interleave_lowv16qi;
11882 unpack = gen_vec_interleave_highv8hi;
11884 unpack = gen_vec_interleave_lowv8hi;
11888 unpack = gen_vec_interleave_highv4si;
11890 unpack = gen_vec_interleave_lowv4si;
11893 gcc_unreachable ();
11896 dest = gen_lowpart (imode, operands[0]);
11899 se = force_reg (imode, CONST0_RTX (imode));
11901 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11902 operands[1], pc_rtx, pc_rtx);
11904 emit_insn (unpack (dest, operands[1], se));
11907 /* Expand conditional increment or decrement using adb/sbb instructions.
11908 The default case using setcc followed by the conditional move can be
11909 done by generic code. */
11911 ix86_expand_int_addcc (rtx operands[])
11913 enum rtx_code code = GET_CODE (operands[1]);
11915 rtx val = const0_rtx;
11916 bool fpcmp = false;
11917 enum machine_mode mode = GET_MODE (operands[0]);
11919 if (operands[3] != const1_rtx
11920 && operands[3] != constm1_rtx)
11922 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11923 ix86_compare_op1, &compare_op))
11925 code = GET_CODE (compare_op);
11927 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11928 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11931 code = ix86_fp_compare_code_to_integer (code);
11938 PUT_CODE (compare_op,
11939 reverse_condition_maybe_unordered
11940 (GET_CODE (compare_op)));
11942 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11944 PUT_MODE (compare_op, mode);
11946 /* Construct either adc or sbb insn. */
11947 if ((code == LTU) == (operands[3] == constm1_rtx))
11949 switch (GET_MODE (operands[0]))
11952 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11955 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11958 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11961 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11964 gcc_unreachable ();
11969 switch (GET_MODE (operands[0]))
11972 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11975 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11978 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11981 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11984 gcc_unreachable ();
11987 return 1; /* DONE */
11991 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11992 works for floating pointer parameters and nonoffsetable memories.
11993 For pushes, it returns just stack offsets; the values will be saved
11994 in the right order. Maximally three parts are generated. */
11997 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12002 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12004 size = (GET_MODE_SIZE (mode) + 4) / 8;
12006 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12007 gcc_assert (size >= 2 && size <= 3);
12009 /* Optimize constant pool reference to immediates. This is used by fp
12010 moves, that force all constants to memory to allow combining. */
12011 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12013 rtx tmp = maybe_get_pool_constant (operand);
12018 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12020 /* The only non-offsetable memories we handle are pushes. */
12021 int ok = push_operand (operand, VOIDmode);
12025 operand = copy_rtx (operand);
12026 PUT_MODE (operand, Pmode);
12027 parts[0] = parts[1] = parts[2] = operand;
12031 if (GET_CODE (operand) == CONST_VECTOR)
12033 enum machine_mode imode = int_mode_for_mode (mode);
12034 /* Caution: if we looked through a constant pool memory above,
12035 the operand may actually have a different mode now. That's
12036 ok, since we want to pun this all the way back to an integer. */
12037 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12038 gcc_assert (operand != NULL);
12044 if (mode == DImode)
12045 split_di (&operand, 1, &parts[0], &parts[1]);
12048 if (REG_P (operand))
12050 gcc_assert (reload_completed);
12051 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12052 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12054 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12056 else if (offsettable_memref_p (operand))
12058 operand = adjust_address (operand, SImode, 0);
12059 parts[0] = operand;
12060 parts[1] = adjust_address (operand, SImode, 4);
12062 parts[2] = adjust_address (operand, SImode, 8);
12064 else if (GET_CODE (operand) == CONST_DOUBLE)
12069 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12073 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12074 parts[2] = gen_int_mode (l[2], SImode);
12077 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12080 gcc_unreachable ();
12082 parts[1] = gen_int_mode (l[1], SImode);
12083 parts[0] = gen_int_mode (l[0], SImode);
12086 gcc_unreachable ();
12091 if (mode == TImode)
12092 split_ti (&operand, 1, &parts[0], &parts[1]);
12093 if (mode == XFmode || mode == TFmode)
12095 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12096 if (REG_P (operand))
12098 gcc_assert (reload_completed);
12099 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12100 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12102 else if (offsettable_memref_p (operand))
12104 operand = adjust_address (operand, DImode, 0);
12105 parts[0] = operand;
12106 parts[1] = adjust_address (operand, upper_mode, 8);
12108 else if (GET_CODE (operand) == CONST_DOUBLE)
12113 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12114 real_to_target (l, &r, mode);
12116 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12117 if (HOST_BITS_PER_WIDE_INT >= 64)
12120 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12121 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12124 parts[0] = immed_double_const (l[0], l[1], DImode);
12126 if (upper_mode == SImode)
12127 parts[1] = gen_int_mode (l[2], SImode);
12128 else if (HOST_BITS_PER_WIDE_INT >= 64)
12131 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12132 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12135 parts[1] = immed_double_const (l[2], l[3], DImode);
12138 gcc_unreachable ();
12145 /* Emit insns to perform a move or push of DI, DF, and XF values.
12146 Return false when normal moves are needed; true when all required
12147 insns have been emitted. Operands 2-4 contain the input values
12148 int the correct order; operands 5-7 contain the output values. */
12151 ix86_split_long_move (rtx operands[])
12156 int collisions = 0;
12157 enum machine_mode mode = GET_MODE (operands[0]);
12159 /* The DFmode expanders may ask us to move double.
12160 For 64bit target this is single move. By hiding the fact
12161 here we simplify i386.md splitters. */
12162 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12164 /* Optimize constant pool reference to immediates. This is used by
12165 fp moves, that force all constants to memory to allow combining. */
12167 if (GET_CODE (operands[1]) == MEM
12168 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12169 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12170 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12171 if (push_operand (operands[0], VOIDmode))
12173 operands[0] = copy_rtx (operands[0]);
12174 PUT_MODE (operands[0], Pmode);
12177 operands[0] = gen_lowpart (DImode, operands[0]);
12178 operands[1] = gen_lowpart (DImode, operands[1]);
12179 emit_move_insn (operands[0], operands[1]);
12183 /* The only non-offsettable memory we handle is push. */
12184 if (push_operand (operands[0], VOIDmode))
12187 gcc_assert (GET_CODE (operands[0]) != MEM
12188 || offsettable_memref_p (operands[0]));
12190 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12191 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12193 /* When emitting push, take care for source operands on the stack. */
12194 if (push && GET_CODE (operands[1]) == MEM
12195 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12198 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12199 XEXP (part[1][2], 0));
12200 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12201 XEXP (part[1][1], 0));
12204 /* We need to do copy in the right order in case an address register
12205 of the source overlaps the destination. */
12206 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12208 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12210 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12213 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12216 /* Collision in the middle part can be handled by reordering. */
12217 if (collisions == 1 && nparts == 3
12218 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12221 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12222 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12225 /* If there are more collisions, we can't handle it by reordering.
12226 Do an lea to the last part and use only one colliding move. */
12227 else if (collisions > 1)
12233 base = part[0][nparts - 1];
12235 /* Handle the case when the last part isn't valid for lea.
12236 Happens in 64-bit mode storing the 12-byte XFmode. */
12237 if (GET_MODE (base) != Pmode)
12238 base = gen_rtx_REG (Pmode, REGNO (base));
12240 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12241 part[1][0] = replace_equiv_address (part[1][0], base);
12242 part[1][1] = replace_equiv_address (part[1][1],
12243 plus_constant (base, UNITS_PER_WORD));
12245 part[1][2] = replace_equiv_address (part[1][2],
12246 plus_constant (base, 8));
12256 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12257 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12258 emit_move_insn (part[0][2], part[1][2]);
12263 /* In 64bit mode we don't have 32bit push available. In case this is
12264 register, it is OK - we will just use larger counterpart. We also
12265 retype memory - these comes from attempt to avoid REX prefix on
12266 moving of second half of TFmode value. */
12267 if (GET_MODE (part[1][1]) == SImode)
12269 switch (GET_CODE (part[1][1]))
12272 part[1][1] = adjust_address (part[1][1], DImode, 0);
12276 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12280 gcc_unreachable ();
12283 if (GET_MODE (part[1][0]) == SImode)
12284 part[1][0] = part[1][1];
12287 emit_move_insn (part[0][1], part[1][1]);
12288 emit_move_insn (part[0][0], part[1][0]);
12292 /* Choose correct order to not overwrite the source before it is copied. */
12293 if ((REG_P (part[0][0])
12294 && REG_P (part[1][1])
12295 && (REGNO (part[0][0]) == REGNO (part[1][1])
12297 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12299 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12303 operands[2] = part[0][2];
12304 operands[3] = part[0][1];
12305 operands[4] = part[0][0];
12306 operands[5] = part[1][2];
12307 operands[6] = part[1][1];
12308 operands[7] = part[1][0];
12312 operands[2] = part[0][1];
12313 operands[3] = part[0][0];
12314 operands[5] = part[1][1];
12315 operands[6] = part[1][0];
12322 operands[2] = part[0][0];
12323 operands[3] = part[0][1];
12324 operands[4] = part[0][2];
12325 operands[5] = part[1][0];
12326 operands[6] = part[1][1];
12327 operands[7] = part[1][2];
12331 operands[2] = part[0][0];
12332 operands[3] = part[0][1];
12333 operands[5] = part[1][0];
12334 operands[6] = part[1][1];
12338 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12341 if (GET_CODE (operands[5]) == CONST_INT
12342 && operands[5] != const0_rtx
12343 && REG_P (operands[2]))
12345 if (GET_CODE (operands[6]) == CONST_INT
12346 && INTVAL (operands[6]) == INTVAL (operands[5]))
12347 operands[6] = operands[2];
12350 && GET_CODE (operands[7]) == CONST_INT
12351 && INTVAL (operands[7]) == INTVAL (operands[5]))
12352 operands[7] = operands[2];
12356 && GET_CODE (operands[6]) == CONST_INT
12357 && operands[6] != const0_rtx
12358 && REG_P (operands[3])
12359 && GET_CODE (operands[7]) == CONST_INT
12360 && INTVAL (operands[7]) == INTVAL (operands[6]))
12361 operands[7] = operands[3];
12364 emit_move_insn (operands[2], operands[5]);
12365 emit_move_insn (operands[3], operands[6]);
12367 emit_move_insn (operands[4], operands[7]);
12372 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12373 left shift by a constant, either using a single shift or
12374 a sequence of add instructions. */
12377 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12381 emit_insn ((mode == DImode
12383 : gen_adddi3) (operand, operand, operand));
12385 else if (!optimize_size
12386 && count * ix86_cost->add <= ix86_cost->shift_const)
12389 for (i=0; i<count; i++)
12391 emit_insn ((mode == DImode
12393 : gen_adddi3) (operand, operand, operand));
12397 emit_insn ((mode == DImode
12399 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12403 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12405 rtx low[2], high[2];
12407 const int single_width = mode == DImode ? 32 : 64;
12409 if (GET_CODE (operands[2]) == CONST_INT)
12411 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12412 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12414 if (count >= single_width)
12416 emit_move_insn (high[0], low[1]);
12417 emit_move_insn (low[0], const0_rtx);
12419 if (count > single_width)
12420 ix86_expand_ashl_const (high[0], count - single_width, mode);
12424 if (!rtx_equal_p (operands[0], operands[1]))
12425 emit_move_insn (operands[0], operands[1]);
12426 emit_insn ((mode == DImode
12428 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12429 ix86_expand_ashl_const (low[0], count, mode);
12434 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12436 if (operands[1] == const1_rtx)
12438 /* Assuming we've chosen a QImode capable registers, then 1 << N
12439 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12440 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12442 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12444 ix86_expand_clear (low[0]);
12445 ix86_expand_clear (high[0]);
12446 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12448 d = gen_lowpart (QImode, low[0]);
12449 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12450 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12451 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12453 d = gen_lowpart (QImode, high[0]);
12454 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12455 s = gen_rtx_NE (QImode, flags, const0_rtx);
12456 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12459 /* Otherwise, we can get the same results by manually performing
12460 a bit extract operation on bit 5/6, and then performing the two
12461 shifts. The two methods of getting 0/1 into low/high are exactly
12462 the same size. Avoiding the shift in the bit extract case helps
12463 pentium4 a bit; no one else seems to care much either way. */
12468 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12469 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12471 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12472 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12474 emit_insn ((mode == DImode
12476 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12477 emit_insn ((mode == DImode
12479 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12480 emit_move_insn (low[0], high[0]);
12481 emit_insn ((mode == DImode
12483 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12486 emit_insn ((mode == DImode
12488 : gen_ashldi3) (low[0], low[0], operands[2]));
12489 emit_insn ((mode == DImode
12491 : gen_ashldi3) (high[0], high[0], operands[2]));
12495 if (operands[1] == constm1_rtx)
12497 /* For -1 << N, we can avoid the shld instruction, because we
12498 know that we're shifting 0...31/63 ones into a -1. */
12499 emit_move_insn (low[0], constm1_rtx);
12501 emit_move_insn (high[0], low[0]);
12503 emit_move_insn (high[0], constm1_rtx);
12507 if (!rtx_equal_p (operands[0], operands[1]))
12508 emit_move_insn (operands[0], operands[1]);
12510 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12511 emit_insn ((mode == DImode
12513 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12516 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12518 if (TARGET_CMOVE && scratch)
12520 ix86_expand_clear (scratch);
12521 emit_insn ((mode == DImode
12522 ? gen_x86_shift_adj_1
12523 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12526 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12530 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12532 rtx low[2], high[2];
12534 const int single_width = mode == DImode ? 32 : 64;
12536 if (GET_CODE (operands[2]) == CONST_INT)
12538 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12539 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12541 if (count == single_width * 2 - 1)
12543 emit_move_insn (high[0], high[1]);
12544 emit_insn ((mode == DImode
12546 : gen_ashrdi3) (high[0], high[0],
12547 GEN_INT (single_width - 1)));
12548 emit_move_insn (low[0], high[0]);
12551 else if (count >= single_width)
12553 emit_move_insn (low[0], high[1]);
12554 emit_move_insn (high[0], low[0]);
12555 emit_insn ((mode == DImode
12557 : gen_ashrdi3) (high[0], high[0],
12558 GEN_INT (single_width - 1)));
12559 if (count > single_width)
12560 emit_insn ((mode == DImode
12562 : gen_ashrdi3) (low[0], low[0],
12563 GEN_INT (count - single_width)));
12567 if (!rtx_equal_p (operands[0], operands[1]))
12568 emit_move_insn (operands[0], operands[1]);
12569 emit_insn ((mode == DImode
12571 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12572 emit_insn ((mode == DImode
12574 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12579 if (!rtx_equal_p (operands[0], operands[1]))
12580 emit_move_insn (operands[0], operands[1]);
12582 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12584 emit_insn ((mode == DImode
12586 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12587 emit_insn ((mode == DImode
12589 : gen_ashrdi3) (high[0], high[0], operands[2]));
12591 if (TARGET_CMOVE && scratch)
12593 emit_move_insn (scratch, high[0]);
12594 emit_insn ((mode == DImode
12596 : gen_ashrdi3) (scratch, scratch,
12597 GEN_INT (single_width - 1)));
12598 emit_insn ((mode == DImode
12599 ? gen_x86_shift_adj_1
12600 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12604 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12609 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12611 rtx low[2], high[2];
12613 const int single_width = mode == DImode ? 32 : 64;
12615 if (GET_CODE (operands[2]) == CONST_INT)
12617 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12618 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12620 if (count >= single_width)
12622 emit_move_insn (low[0], high[1]);
12623 ix86_expand_clear (high[0]);
12625 if (count > single_width)
12626 emit_insn ((mode == DImode
12628 : gen_lshrdi3) (low[0], low[0],
12629 GEN_INT (count - single_width)));
12633 if (!rtx_equal_p (operands[0], operands[1]))
12634 emit_move_insn (operands[0], operands[1]);
12635 emit_insn ((mode == DImode
12637 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12638 emit_insn ((mode == DImode
12640 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12645 if (!rtx_equal_p (operands[0], operands[1]))
12646 emit_move_insn (operands[0], operands[1]);
12648 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12650 emit_insn ((mode == DImode
12652 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12653 emit_insn ((mode == DImode
12655 : gen_lshrdi3) (high[0], high[0], operands[2]));
12657 /* Heh. By reversing the arguments, we can reuse this pattern. */
12658 if (TARGET_CMOVE && scratch)
12660 ix86_expand_clear (scratch);
12661 emit_insn ((mode == DImode
12662 ? gen_x86_shift_adj_1
12663 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12667 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12671 /* Helper function for the string operations below. Dest VARIABLE whether
12672 it is aligned to VALUE bytes. If true, jump to the label. */
12674 ix86_expand_aligntest (rtx variable, int value)
12676 rtx label = gen_label_rtx ();
12677 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12678 if (GET_MODE (variable) == DImode)
12679 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12681 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12682 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12687 /* Adjust COUNTER by the VALUE. */
12689 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12691 if (GET_MODE (countreg) == DImode)
12692 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12694 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12697 /* Zero extend possibly SImode EXP to Pmode register. */
12699 ix86_zero_extend_to_Pmode (rtx exp)
12702 if (GET_MODE (exp) == VOIDmode)
12703 return force_reg (Pmode, exp);
12704 if (GET_MODE (exp) == Pmode)
12705 return copy_to_mode_reg (Pmode, exp);
12706 r = gen_reg_rtx (Pmode);
12707 emit_insn (gen_zero_extendsidi2 (r, exp));
12711 /* Expand string move (memcpy) operation. Use i386 string operations when
12712 profitable. expand_clrmem contains similar code. */
12714 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12716 rtx srcreg, destreg, countreg, srcexp, destexp;
12717 enum machine_mode counter_mode;
12718 HOST_WIDE_INT align = 0;
12719 unsigned HOST_WIDE_INT count = 0;
12721 if (GET_CODE (align_exp) == CONST_INT)
12722 align = INTVAL (align_exp);
12724 /* Can't use any of this if the user has appropriated esi or edi. */
12725 if (global_regs[4] || global_regs[5])
12728 /* This simple hack avoids all inlining code and simplifies code below. */
12729 if (!TARGET_ALIGN_STRINGOPS)
12732 if (GET_CODE (count_exp) == CONST_INT)
12734 count = INTVAL (count_exp);
12735 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12739 /* Figure out proper mode for counter. For 32bits it is always SImode,
12740 for 64bits use SImode when possible, otherwise DImode.
12741 Set count to number of bytes copied when known at compile time. */
12743 || GET_MODE (count_exp) == SImode
12744 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12745 counter_mode = SImode;
12747 counter_mode = DImode;
12749 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12751 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12752 if (destreg != XEXP (dst, 0))
12753 dst = replace_equiv_address_nv (dst, destreg);
12754 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12755 if (srcreg != XEXP (src, 0))
12756 src = replace_equiv_address_nv (src, srcreg);
12758 /* When optimizing for size emit simple rep ; movsb instruction for
12759 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12760 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12761 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12762 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12763 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12764 known to be zero or not. The rep; movsb sequence causes higher
12765 register pressure though, so take that into account. */
12767 if ((!optimize || optimize_size)
12772 || (count & 3) + count / 4 > 6))))
12774 emit_insn (gen_cld ());
12775 countreg = ix86_zero_extend_to_Pmode (count_exp);
12776 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12777 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12778 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12782 /* For constant aligned (or small unaligned) copies use rep movsl
12783 followed by code copying the rest. For PentiumPro ensure 8 byte
12784 alignment to allow rep movsl acceleration. */
12786 else if (count != 0
12788 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12789 || optimize_size || count < (unsigned int) 64))
12791 unsigned HOST_WIDE_INT offset = 0;
12792 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12793 rtx srcmem, dstmem;
12795 emit_insn (gen_cld ());
12796 if (count & ~(size - 1))
12798 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12800 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12802 while (offset < (count & ~(size - 1)))
12804 srcmem = adjust_automodify_address_nv (src, movs_mode,
12806 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12808 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12814 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12815 & (TARGET_64BIT ? -1 : 0x3fffffff));
12816 countreg = copy_to_mode_reg (counter_mode, countreg);
12817 countreg = ix86_zero_extend_to_Pmode (countreg);
12819 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12820 GEN_INT (size == 4 ? 2 : 3));
12821 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12822 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12824 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12825 countreg, destexp, srcexp));
12826 offset = count & ~(size - 1);
12829 if (size == 8 && (count & 0x04))
12831 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12833 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12835 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12840 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12842 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12844 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12849 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12851 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12853 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12856 /* The generic code based on the glibc implementation:
12857 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12858 allowing accelerated copying there)
12859 - copy the data using rep movsl
12860 - copy the rest. */
12865 rtx srcmem, dstmem;
12866 int desired_alignment = (TARGET_PENTIUMPRO
12867 && (count == 0 || count >= (unsigned int) 260)
12868 ? 8 : UNITS_PER_WORD);
12869 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12870 dst = change_address (dst, BLKmode, destreg);
12871 src = change_address (src, BLKmode, srcreg);
12873 /* In case we don't know anything about the alignment, default to
12874 library version, since it is usually equally fast and result in
12877 Also emit call when we know that the count is large and call overhead
12878 will not be important. */
12879 if (!TARGET_INLINE_ALL_STRINGOPS
12880 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12883 if (TARGET_SINGLE_STRINGOP)
12884 emit_insn (gen_cld ());
12886 countreg2 = gen_reg_rtx (Pmode);
12887 countreg = copy_to_mode_reg (counter_mode, count_exp);
12889 /* We don't use loops to align destination and to copy parts smaller
12890 than 4 bytes, because gcc is able to optimize such code better (in
12891 the case the destination or the count really is aligned, gcc is often
12892 able to predict the branches) and also it is friendlier to the
12893 hardware branch prediction.
12895 Using loops is beneficial for generic case, because we can
12896 handle small counts using the loops. Many CPUs (such as Athlon)
12897 have large REP prefix setup costs.
12899 This is quite costly. Maybe we can revisit this decision later or
12900 add some customizability to this code. */
12902 if (count == 0 && align < desired_alignment)
12904 label = gen_label_rtx ();
12905 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12906 LEU, 0, counter_mode, 1, label);
12910 rtx label = ix86_expand_aligntest (destreg, 1);
12911 srcmem = change_address (src, QImode, srcreg);
12912 dstmem = change_address (dst, QImode, destreg);
12913 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12914 ix86_adjust_counter (countreg, 1);
12915 emit_label (label);
12916 LABEL_NUSES (label) = 1;
12920 rtx label = ix86_expand_aligntest (destreg, 2);
12921 srcmem = change_address (src, HImode, srcreg);
12922 dstmem = change_address (dst, HImode, destreg);
12923 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12924 ix86_adjust_counter (countreg, 2);
12925 emit_label (label);
12926 LABEL_NUSES (label) = 1;
12928 if (align <= 4 && desired_alignment > 4)
12930 rtx label = ix86_expand_aligntest (destreg, 4);
12931 srcmem = change_address (src, SImode, srcreg);
12932 dstmem = change_address (dst, SImode, destreg);
12933 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12934 ix86_adjust_counter (countreg, 4);
12935 emit_label (label);
12936 LABEL_NUSES (label) = 1;
12939 if (label && desired_alignment > 4 && !TARGET_64BIT)
12941 emit_label (label);
12942 LABEL_NUSES (label) = 1;
12945 if (!TARGET_SINGLE_STRINGOP)
12946 emit_insn (gen_cld ());
12949 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12951 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12955 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12956 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12958 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12959 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12960 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12961 countreg2, destexp, srcexp));
12965 emit_label (label);
12966 LABEL_NUSES (label) = 1;
12968 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12970 srcmem = change_address (src, SImode, srcreg);
12971 dstmem = change_address (dst, SImode, destreg);
12972 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12974 if ((align <= 4 || count == 0) && TARGET_64BIT)
12976 rtx label = ix86_expand_aligntest (countreg, 4);
12977 srcmem = change_address (src, SImode, srcreg);
12978 dstmem = change_address (dst, SImode, destreg);
12979 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12980 emit_label (label);
12981 LABEL_NUSES (label) = 1;
12983 if (align > 2 && count != 0 && (count & 2))
12985 srcmem = change_address (src, HImode, srcreg);
12986 dstmem = change_address (dst, HImode, destreg);
12987 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12989 if (align <= 2 || count == 0)
12991 rtx label = ix86_expand_aligntest (countreg, 2);
12992 srcmem = change_address (src, HImode, srcreg);
12993 dstmem = change_address (dst, HImode, destreg);
12994 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12995 emit_label (label);
12996 LABEL_NUSES (label) = 1;
12998 if (align > 1 && count != 0 && (count & 1))
13000 srcmem = change_address (src, QImode, srcreg);
13001 dstmem = change_address (dst, QImode, destreg);
13002 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13004 if (align <= 1 || count == 0)
13006 rtx label = ix86_expand_aligntest (countreg, 1);
13007 srcmem = change_address (src, QImode, srcreg);
13008 dstmem = change_address (dst, QImode, destreg);
13009 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13010 emit_label (label);
13011 LABEL_NUSES (label) = 1;
13018 /* Expand string clear operation (bzero). Use i386 string operations when
13019 profitable. expand_movmem contains similar code. */
13021 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13023 rtx destreg, zeroreg, countreg, destexp;
13024 enum machine_mode counter_mode;
13025 HOST_WIDE_INT align = 0;
13026 unsigned HOST_WIDE_INT count = 0;
13028 if (GET_CODE (align_exp) == CONST_INT)
13029 align = INTVAL (align_exp);
13031 /* Can't use any of this if the user has appropriated esi. */
13032 if (global_regs[4])
13035 /* This simple hack avoids all inlining code and simplifies code below. */
13036 if (!TARGET_ALIGN_STRINGOPS)
13039 if (GET_CODE (count_exp) == CONST_INT)
13041 count = INTVAL (count_exp);
13042 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13045 /* Figure out proper mode for counter. For 32bits it is always SImode,
13046 for 64bits use SImode when possible, otherwise DImode.
13047 Set count to number of bytes copied when known at compile time. */
13049 || GET_MODE (count_exp) == SImode
13050 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13051 counter_mode = SImode;
13053 counter_mode = DImode;
13055 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13056 if (destreg != XEXP (dst, 0))
13057 dst = replace_equiv_address_nv (dst, destreg);
13060 /* When optimizing for size emit simple rep ; movsb instruction for
13061 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13062 sequence is 7 bytes long, so if optimizing for size and count is
13063 small enough that some stosl, stosw and stosb instructions without
13064 rep are shorter, fall back into the next if. */
13066 if ((!optimize || optimize_size)
13069 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13071 emit_insn (gen_cld ());
13073 countreg = ix86_zero_extend_to_Pmode (count_exp);
13074 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13075 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13076 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13078 else if (count != 0
13080 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13081 || optimize_size || count < (unsigned int) 64))
13083 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13084 unsigned HOST_WIDE_INT offset = 0;
13086 emit_insn (gen_cld ());
13088 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13089 if (count & ~(size - 1))
13091 unsigned HOST_WIDE_INT repcount;
13092 unsigned int max_nonrep;
13094 repcount = count >> (size == 4 ? 2 : 3);
13096 repcount &= 0x3fffffff;
13098 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13099 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13100 bytes. In both cases the latter seems to be faster for small
13102 max_nonrep = size == 4 ? 7 : 4;
13103 if (!optimize_size)
13106 case PROCESSOR_PENTIUM4:
13107 case PROCESSOR_NOCONA:
13114 if (repcount <= max_nonrep)
13115 while (repcount-- > 0)
13117 rtx mem = adjust_automodify_address_nv (dst,
13118 GET_MODE (zeroreg),
13120 emit_insn (gen_strset (destreg, mem, zeroreg));
13125 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13126 countreg = ix86_zero_extend_to_Pmode (countreg);
13127 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13128 GEN_INT (size == 4 ? 2 : 3));
13129 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13130 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13132 offset = count & ~(size - 1);
13135 if (size == 8 && (count & 0x04))
13137 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13139 emit_insn (gen_strset (destreg, mem,
13140 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13145 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13147 emit_insn (gen_strset (destreg, mem,
13148 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13153 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13155 emit_insn (gen_strset (destreg, mem,
13156 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13163 /* Compute desired alignment of the string operation. */
13164 int desired_alignment = (TARGET_PENTIUMPRO
13165 && (count == 0 || count >= (unsigned int) 260)
13166 ? 8 : UNITS_PER_WORD);
13168 /* In case we don't know anything about the alignment, default to
13169 library version, since it is usually equally fast and result in
13172 Also emit call when we know that the count is large and call overhead
13173 will not be important. */
13174 if (!TARGET_INLINE_ALL_STRINGOPS
13175 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13178 if (TARGET_SINGLE_STRINGOP)
13179 emit_insn (gen_cld ());
13181 countreg2 = gen_reg_rtx (Pmode);
13182 countreg = copy_to_mode_reg (counter_mode, count_exp);
13183 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13184 /* Get rid of MEM_OFFSET, it won't be accurate. */
13185 dst = change_address (dst, BLKmode, destreg);
13187 if (count == 0 && align < desired_alignment)
13189 label = gen_label_rtx ();
13190 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13191 LEU, 0, counter_mode, 1, label);
13195 rtx label = ix86_expand_aligntest (destreg, 1);
13196 emit_insn (gen_strset (destreg, dst,
13197 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13198 ix86_adjust_counter (countreg, 1);
13199 emit_label (label);
13200 LABEL_NUSES (label) = 1;
13204 rtx label = ix86_expand_aligntest (destreg, 2);
13205 emit_insn (gen_strset (destreg, dst,
13206 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13207 ix86_adjust_counter (countreg, 2);
13208 emit_label (label);
13209 LABEL_NUSES (label) = 1;
13211 if (align <= 4 && desired_alignment > 4)
13213 rtx label = ix86_expand_aligntest (destreg, 4);
13214 emit_insn (gen_strset (destreg, dst,
13216 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13218 ix86_adjust_counter (countreg, 4);
13219 emit_label (label);
13220 LABEL_NUSES (label) = 1;
13223 if (label && desired_alignment > 4 && !TARGET_64BIT)
13225 emit_label (label);
13226 LABEL_NUSES (label) = 1;
13230 if (!TARGET_SINGLE_STRINGOP)
13231 emit_insn (gen_cld ());
13234 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13236 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13240 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13241 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13243 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13244 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13248 emit_label (label);
13249 LABEL_NUSES (label) = 1;
13252 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13253 emit_insn (gen_strset (destreg, dst,
13254 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13255 if (TARGET_64BIT && (align <= 4 || count == 0))
13257 rtx label = ix86_expand_aligntest (countreg, 4);
13258 emit_insn (gen_strset (destreg, dst,
13259 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13260 emit_label (label);
13261 LABEL_NUSES (label) = 1;
13263 if (align > 2 && count != 0 && (count & 2))
13264 emit_insn (gen_strset (destreg, dst,
13265 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13266 if (align <= 2 || count == 0)
13268 rtx label = ix86_expand_aligntest (countreg, 2);
13269 emit_insn (gen_strset (destreg, dst,
13270 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13271 emit_label (label);
13272 LABEL_NUSES (label) = 1;
13274 if (align > 1 && count != 0 && (count & 1))
13275 emit_insn (gen_strset (destreg, dst,
13276 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13277 if (align <= 1 || count == 0)
13279 rtx label = ix86_expand_aligntest (countreg, 1);
13280 emit_insn (gen_strset (destreg, dst,
13281 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13282 emit_label (label);
13283 LABEL_NUSES (label) = 1;
13289 /* Expand strlen. */
13291 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13293 rtx addr, scratch1, scratch2, scratch3, scratch4;
13295 /* The generic case of strlen expander is long. Avoid it's
13296 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13298 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13299 && !TARGET_INLINE_ALL_STRINGOPS
13301 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13304 addr = force_reg (Pmode, XEXP (src, 0));
13305 scratch1 = gen_reg_rtx (Pmode);
13307 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13310 /* Well it seems that some optimizer does not combine a call like
13311 foo(strlen(bar), strlen(bar));
13312 when the move and the subtraction is done here. It does calculate
13313 the length just once when these instructions are done inside of
13314 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13315 often used and I use one fewer register for the lifetime of
13316 output_strlen_unroll() this is better. */
13318 emit_move_insn (out, addr);
13320 ix86_expand_strlensi_unroll_1 (out, src, align);
13322 /* strlensi_unroll_1 returns the address of the zero at the end of
13323 the string, like memchr(), so compute the length by subtracting
13324 the start address. */
13326 emit_insn (gen_subdi3 (out, out, addr));
13328 emit_insn (gen_subsi3 (out, out, addr));
13333 scratch2 = gen_reg_rtx (Pmode);
13334 scratch3 = gen_reg_rtx (Pmode);
13335 scratch4 = force_reg (Pmode, constm1_rtx);
13337 emit_move_insn (scratch3, addr);
13338 eoschar = force_reg (QImode, eoschar);
13340 emit_insn (gen_cld ());
13341 src = replace_equiv_address_nv (src, scratch3);
13343 /* If .md starts supporting :P, this can be done in .md. */
13344 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13345 scratch4), UNSPEC_SCAS);
13346 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13349 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13350 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13354 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13355 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13361 /* Expand the appropriate insns for doing strlen if not just doing
13364 out = result, initialized with the start address
13365 align_rtx = alignment of the address.
13366 scratch = scratch register, initialized with the startaddress when
13367 not aligned, otherwise undefined
13369 This is just the body. It needs the initializations mentioned above and
13370 some address computing at the end. These things are done in i386.md. */
13373 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13377 rtx align_2_label = NULL_RTX;
13378 rtx align_3_label = NULL_RTX;
13379 rtx align_4_label = gen_label_rtx ();
13380 rtx end_0_label = gen_label_rtx ();
13382 rtx tmpreg = gen_reg_rtx (SImode);
13383 rtx scratch = gen_reg_rtx (SImode);
13387 if (GET_CODE (align_rtx) == CONST_INT)
13388 align = INTVAL (align_rtx);
13390 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13392 /* Is there a known alignment and is it less than 4? */
13395 rtx scratch1 = gen_reg_rtx (Pmode);
13396 emit_move_insn (scratch1, out);
13397 /* Is there a known alignment and is it not 2? */
13400 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13401 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13403 /* Leave just the 3 lower bits. */
13404 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13405 NULL_RTX, 0, OPTAB_WIDEN);
13407 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13408 Pmode, 1, align_4_label);
13409 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13410 Pmode, 1, align_2_label);
13411 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13412 Pmode, 1, align_3_label);
13416 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13417 check if is aligned to 4 - byte. */
13419 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13420 NULL_RTX, 0, OPTAB_WIDEN);
13422 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13423 Pmode, 1, align_4_label);
13426 mem = change_address (src, QImode, out);
13428 /* Now compare the bytes. */
13430 /* Compare the first n unaligned byte on a byte per byte basis. */
13431 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13432 QImode, 1, end_0_label);
13434 /* Increment the address. */
13436 emit_insn (gen_adddi3 (out, out, const1_rtx));
13438 emit_insn (gen_addsi3 (out, out, const1_rtx));
13440 /* Not needed with an alignment of 2 */
13443 emit_label (align_2_label);
13445 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13449 emit_insn (gen_adddi3 (out, out, const1_rtx));
13451 emit_insn (gen_addsi3 (out, out, const1_rtx));
13453 emit_label (align_3_label);
13456 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13460 emit_insn (gen_adddi3 (out, out, const1_rtx));
13462 emit_insn (gen_addsi3 (out, out, const1_rtx));
13465 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13466 align this loop. It gives only huge programs, but does not help to
13468 emit_label (align_4_label);
13470 mem = change_address (src, SImode, out);
13471 emit_move_insn (scratch, mem);
13473 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13475 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13477 /* This formula yields a nonzero result iff one of the bytes is zero.
13478 This saves three branches inside loop and many cycles. */
13480 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13481 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13482 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13483 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13484 gen_int_mode (0x80808080, SImode)));
13485 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13490 rtx reg = gen_reg_rtx (SImode);
13491 rtx reg2 = gen_reg_rtx (Pmode);
13492 emit_move_insn (reg, tmpreg);
13493 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13495 /* If zero is not in the first two bytes, move two bytes forward. */
13496 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13497 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13498 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13499 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13500 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13503 /* Emit lea manually to avoid clobbering of flags. */
13504 emit_insn (gen_rtx_SET (SImode, reg2,
13505 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13507 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13508 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13509 emit_insn (gen_rtx_SET (VOIDmode, out,
13510 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13517 rtx end_2_label = gen_label_rtx ();
13518 /* Is zero in the first two bytes? */
13520 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13521 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13522 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13523 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13524 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13526 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13527 JUMP_LABEL (tmp) = end_2_label;
13529 /* Not in the first two. Move two bytes forward. */
13530 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13532 emit_insn (gen_adddi3 (out, out, const2_rtx));
13534 emit_insn (gen_addsi3 (out, out, const2_rtx));
13536 emit_label (end_2_label);
13540 /* Avoid branch in fixing the byte. */
13541 tmpreg = gen_lowpart (QImode, tmpreg);
13542 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13543 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13545 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13547 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13549 emit_label (end_0_label);
13553 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13554 rtx callarg2 ATTRIBUTE_UNUSED,
13555 rtx pop, int sibcall)
13557 rtx use = NULL, call;
13559 if (pop == const0_rtx)
13561 gcc_assert (!TARGET_64BIT || !pop);
13563 if (TARGET_MACHO && !TARGET_64BIT)
13566 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13567 fnaddr = machopic_indirect_call_target (fnaddr);
13572 /* Static functions and indirect calls don't need the pic register. */
13573 if (! TARGET_64BIT && flag_pic
13574 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13575 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13576 use_reg (&use, pic_offset_table_rtx);
13579 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13581 rtx al = gen_rtx_REG (QImode, 0);
13582 emit_move_insn (al, callarg2);
13583 use_reg (&use, al);
13586 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13588 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13589 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13591 if (sibcall && TARGET_64BIT
13592 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13595 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13596 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13597 emit_move_insn (fnaddr, addr);
13598 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13601 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13603 call = gen_rtx_SET (VOIDmode, retval, call);
13606 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13607 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13608 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13611 call = emit_call_insn (call);
13613 CALL_INSN_FUNCTION_USAGE (call) = use;
13617 /* Clear stack slot assignments remembered from previous functions.
13618 This is called from INIT_EXPANDERS once before RTL is emitted for each
13621 static struct machine_function *
13622 ix86_init_machine_status (void)
13624 struct machine_function *f;
13626 f = ggc_alloc_cleared (sizeof (struct machine_function));
13627 f->use_fast_prologue_epilogue_nregs = -1;
13628 f->tls_descriptor_call_expanded_p = 0;
13633 /* Return a MEM corresponding to a stack slot with mode MODE.
13634 Allocate a new slot if necessary.
13636 The RTL for a function can have several slots available: N is
13637 which slot to use. */
13640 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13642 struct stack_local_entry *s;
13644 gcc_assert (n < MAX_386_STACK_LOCALS);
13646 for (s = ix86_stack_locals; s; s = s->next)
13647 if (s->mode == mode && s->n == n)
13648 return copy_rtx (s->rtl);
13650 s = (struct stack_local_entry *)
13651 ggc_alloc (sizeof (struct stack_local_entry));
13654 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13656 s->next = ix86_stack_locals;
13657 ix86_stack_locals = s;
13661 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13663 static GTY(()) rtx ix86_tls_symbol;
13665 ix86_tls_get_addr (void)
13668 if (!ix86_tls_symbol)
13670 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13671 (TARGET_ANY_GNU_TLS
13673 ? "___tls_get_addr"
13674 : "__tls_get_addr");
13677 return ix86_tls_symbol;
13680 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13682 static GTY(()) rtx ix86_tls_module_base_symbol;
13684 ix86_tls_module_base (void)
13687 if (!ix86_tls_module_base_symbol)
13689 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13690 "_TLS_MODULE_BASE_");
13691 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13692 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13695 return ix86_tls_module_base_symbol;
13698 /* Calculate the length of the memory address in the instruction
13699 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13702 memory_address_length (rtx addr)
13704 struct ix86_address parts;
13705 rtx base, index, disp;
13709 if (GET_CODE (addr) == PRE_DEC
13710 || GET_CODE (addr) == POST_INC
13711 || GET_CODE (addr) == PRE_MODIFY
13712 || GET_CODE (addr) == POST_MODIFY)
13715 ok = ix86_decompose_address (addr, &parts);
13718 if (parts.base && GET_CODE (parts.base) == SUBREG)
13719 parts.base = SUBREG_REG (parts.base);
13720 if (parts.index && GET_CODE (parts.index) == SUBREG)
13721 parts.index = SUBREG_REG (parts.index);
13724 index = parts.index;
13729 - esp as the base always wants an index,
13730 - ebp as the base always wants a displacement. */
13732 /* Register Indirect. */
13733 if (base && !index && !disp)
13735 /* esp (for its index) and ebp (for its displacement) need
13736 the two-byte modrm form. */
13737 if (addr == stack_pointer_rtx
13738 || addr == arg_pointer_rtx
13739 || addr == frame_pointer_rtx
13740 || addr == hard_frame_pointer_rtx)
13744 /* Direct Addressing. */
13745 else if (disp && !base && !index)
13750 /* Find the length of the displacement constant. */
13753 if (base && satisfies_constraint_K (disp))
13758 /* ebp always wants a displacement. */
13759 else if (base == hard_frame_pointer_rtx)
13762 /* An index requires the two-byte modrm form.... */
13764 /* ...like esp, which always wants an index. */
13765 || base == stack_pointer_rtx
13766 || base == arg_pointer_rtx
13767 || base == frame_pointer_rtx)
13774 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13775 is set, expect that insn have 8bit immediate alternative. */
13777 ix86_attr_length_immediate_default (rtx insn, int shortform)
13781 extract_insn_cached (insn);
13782 for (i = recog_data.n_operands - 1; i >= 0; --i)
13783 if (CONSTANT_P (recog_data.operand[i]))
13786 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13790 switch (get_attr_mode (insn))
13801 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13806 fatal_insn ("unknown insn mode", insn);
13812 /* Compute default value for "length_address" attribute. */
13814 ix86_attr_length_address_default (rtx insn)
13818 if (get_attr_type (insn) == TYPE_LEA)
13820 rtx set = PATTERN (insn);
13822 if (GET_CODE (set) == PARALLEL)
13823 set = XVECEXP (set, 0, 0);
13825 gcc_assert (GET_CODE (set) == SET);
13827 return memory_address_length (SET_SRC (set));
13830 extract_insn_cached (insn);
13831 for (i = recog_data.n_operands - 1; i >= 0; --i)
13832 if (GET_CODE (recog_data.operand[i]) == MEM)
13834 return memory_address_length (XEXP (recog_data.operand[i], 0));
13840 /* Return the maximum number of instructions a cpu can issue. */
13843 ix86_issue_rate (void)
13847 case PROCESSOR_PENTIUM:
13851 case PROCESSOR_PENTIUMPRO:
13852 case PROCESSOR_PENTIUM4:
13853 case PROCESSOR_ATHLON:
13855 case PROCESSOR_NOCONA:
13856 case PROCESSOR_GENERIC32:
13857 case PROCESSOR_GENERIC64:
13865 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13866 by DEP_INSN and nothing set by DEP_INSN. */
13869 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13873 /* Simplify the test for uninteresting insns. */
13874 if (insn_type != TYPE_SETCC
13875 && insn_type != TYPE_ICMOV
13876 && insn_type != TYPE_FCMOV
13877 && insn_type != TYPE_IBR)
13880 if ((set = single_set (dep_insn)) != 0)
13882 set = SET_DEST (set);
13885 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13886 && XVECLEN (PATTERN (dep_insn), 0) == 2
13887 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13888 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13890 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13891 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13896 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13899 /* This test is true if the dependent insn reads the flags but
13900 not any other potentially set register. */
13901 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13904 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13910 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13911 address with operands set by DEP_INSN. */
13914 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13918 if (insn_type == TYPE_LEA
13921 addr = PATTERN (insn);
13923 if (GET_CODE (addr) == PARALLEL)
13924 addr = XVECEXP (addr, 0, 0);
13926 gcc_assert (GET_CODE (addr) == SET);
13928 addr = SET_SRC (addr);
13933 extract_insn_cached (insn);
13934 for (i = recog_data.n_operands - 1; i >= 0; --i)
13935 if (GET_CODE (recog_data.operand[i]) == MEM)
13937 addr = XEXP (recog_data.operand[i], 0);
13944 return modified_in_p (addr, dep_insn);
13948 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13950 enum attr_type insn_type, dep_insn_type;
13951 enum attr_memory memory;
13953 int dep_insn_code_number;
13955 /* Anti and output dependencies have zero cost on all CPUs. */
13956 if (REG_NOTE_KIND (link) != 0)
13959 dep_insn_code_number = recog_memoized (dep_insn);
13961 /* If we can't recognize the insns, we can't really do anything. */
13962 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13965 insn_type = get_attr_type (insn);
13966 dep_insn_type = get_attr_type (dep_insn);
13970 case PROCESSOR_PENTIUM:
13971 /* Address Generation Interlock adds a cycle of latency. */
13972 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13975 /* ??? Compares pair with jump/setcc. */
13976 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13979 /* Floating point stores require value to be ready one cycle earlier. */
13980 if (insn_type == TYPE_FMOV
13981 && get_attr_memory (insn) == MEMORY_STORE
13982 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13986 case PROCESSOR_PENTIUMPRO:
13987 memory = get_attr_memory (insn);
13989 /* INT->FP conversion is expensive. */
13990 if (get_attr_fp_int_src (dep_insn))
13993 /* There is one cycle extra latency between an FP op and a store. */
13994 if (insn_type == TYPE_FMOV
13995 && (set = single_set (dep_insn)) != NULL_RTX
13996 && (set2 = single_set (insn)) != NULL_RTX
13997 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13998 && GET_CODE (SET_DEST (set2)) == MEM)
14001 /* Show ability of reorder buffer to hide latency of load by executing
14002 in parallel with previous instruction in case
14003 previous instruction is not needed to compute the address. */
14004 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14005 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14007 /* Claim moves to take one cycle, as core can issue one load
14008 at time and the next load can start cycle later. */
14009 if (dep_insn_type == TYPE_IMOV
14010 || dep_insn_type == TYPE_FMOV)
14018 memory = get_attr_memory (insn);
14020 /* The esp dependency is resolved before the instruction is really
14022 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14023 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14026 /* INT->FP conversion is expensive. */
14027 if (get_attr_fp_int_src (dep_insn))
14030 /* Show ability of reorder buffer to hide latency of load by executing
14031 in parallel with previous instruction in case
14032 previous instruction is not needed to compute the address. */
14033 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14034 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14036 /* Claim moves to take one cycle, as core can issue one load
14037 at time and the next load can start cycle later. */
14038 if (dep_insn_type == TYPE_IMOV
14039 || dep_insn_type == TYPE_FMOV)
14048 case PROCESSOR_ATHLON:
14050 case PROCESSOR_GENERIC32:
14051 case PROCESSOR_GENERIC64:
14052 memory = get_attr_memory (insn);
14054 /* Show ability of reorder buffer to hide latency of load by executing
14055 in parallel with previous instruction in case
14056 previous instruction is not needed to compute the address. */
14057 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14058 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14060 enum attr_unit unit = get_attr_unit (insn);
14063 /* Because of the difference between the length of integer and
14064 floating unit pipeline preparation stages, the memory operands
14065 for floating point are cheaper.
14067 ??? For Athlon it the difference is most probably 2. */
14068 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14071 loadcost = TARGET_ATHLON ? 2 : 0;
14073 if (cost >= loadcost)
14086 /* How many alternative schedules to try. This should be as wide as the
14087 scheduling freedom in the DFA, but no wider. Making this value too
14088 large results extra work for the scheduler. */
14091 ia32_multipass_dfa_lookahead (void)
14093 if (ix86_tune == PROCESSOR_PENTIUM)
14096 if (ix86_tune == PROCESSOR_PENTIUMPRO
14097 || ix86_tune == PROCESSOR_K6)
14105 /* Compute the alignment given to a constant that is being placed in memory.
14106 EXP is the constant and ALIGN is the alignment that the object would
14108 The value of this function is used instead of that alignment to align
14112 ix86_constant_alignment (tree exp, int align)
14114 if (TREE_CODE (exp) == REAL_CST)
14116 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14118 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14121 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14122 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14123 return BITS_PER_WORD;
14128 /* Compute the alignment for a static variable.
14129 TYPE is the data type, and ALIGN is the alignment that
14130 the object would ordinarily have. The value of this function is used
14131 instead of that alignment to align the object. */
14134 ix86_data_alignment (tree type, int align)
14136 int max_align = optimize_size ? BITS_PER_WORD : 256;
14138 if (AGGREGATE_TYPE_P (type)
14139 && TYPE_SIZE (type)
14140 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14141 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14142 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14143 && align < max_align)
14146 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14147 to 16byte boundary. */
14150 if (AGGREGATE_TYPE_P (type)
14151 && TYPE_SIZE (type)
14152 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14153 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14154 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14158 if (TREE_CODE (type) == ARRAY_TYPE)
14160 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14162 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14165 else if (TREE_CODE (type) == COMPLEX_TYPE)
14168 if (TYPE_MODE (type) == DCmode && align < 64)
14170 if (TYPE_MODE (type) == XCmode && align < 128)
14173 else if ((TREE_CODE (type) == RECORD_TYPE
14174 || TREE_CODE (type) == UNION_TYPE
14175 || TREE_CODE (type) == QUAL_UNION_TYPE)
14176 && TYPE_FIELDS (type))
14178 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14180 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14183 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14184 || TREE_CODE (type) == INTEGER_TYPE)
14186 if (TYPE_MODE (type) == DFmode && align < 64)
14188 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14195 /* Compute the alignment for a local variable.
14196 TYPE is the data type, and ALIGN is the alignment that
14197 the object would ordinarily have. The value of this macro is used
14198 instead of that alignment to align the object. */
14201 ix86_local_alignment (tree type, int align)
14203 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14204 to 16byte boundary. */
14207 if (AGGREGATE_TYPE_P (type)
14208 && TYPE_SIZE (type)
14209 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14210 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14211 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14214 if (TREE_CODE (type) == ARRAY_TYPE)
14216 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14218 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14221 else if (TREE_CODE (type) == COMPLEX_TYPE)
14223 if (TYPE_MODE (type) == DCmode && align < 64)
14225 if (TYPE_MODE (type) == XCmode && align < 128)
14228 else if ((TREE_CODE (type) == RECORD_TYPE
14229 || TREE_CODE (type) == UNION_TYPE
14230 || TREE_CODE (type) == QUAL_UNION_TYPE)
14231 && TYPE_FIELDS (type))
14233 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14235 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14238 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14239 || TREE_CODE (type) == INTEGER_TYPE)
14242 if (TYPE_MODE (type) == DFmode && align < 64)
14244 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14250 /* Emit RTL insns to initialize the variable parts of a trampoline.
14251 FNADDR is an RTX for the address of the function's pure code.
14252 CXT is an RTX for the static chain value for the function. */
14254 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14258 /* Compute offset from the end of the jmp to the target function. */
14259 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14260 plus_constant (tramp, 10),
14261 NULL_RTX, 1, OPTAB_DIRECT);
14262 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14263 gen_int_mode (0xb9, QImode));
14264 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14265 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14266 gen_int_mode (0xe9, QImode));
14267 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14272 /* Try to load address using shorter movl instead of movabs.
14273 We may want to support movq for kernel mode, but kernel does not use
14274 trampolines at the moment. */
14275 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14277 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14278 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14279 gen_int_mode (0xbb41, HImode));
14280 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14281 gen_lowpart (SImode, fnaddr));
14286 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14287 gen_int_mode (0xbb49, HImode));
14288 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14292 /* Load static chain using movabs to r10. */
14293 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14294 gen_int_mode (0xba49, HImode));
14295 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14298 /* Jump to the r11 */
14299 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14300 gen_int_mode (0xff49, HImode));
14301 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14302 gen_int_mode (0xe3, QImode));
14304 gcc_assert (offset <= TRAMPOLINE_SIZE);
14307 #ifdef ENABLE_EXECUTE_STACK
14308 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14309 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14313 /* Codes for all the SSE/MMX builtins. */
14316 IX86_BUILTIN_ADDPS,
14317 IX86_BUILTIN_ADDSS,
14318 IX86_BUILTIN_DIVPS,
14319 IX86_BUILTIN_DIVSS,
14320 IX86_BUILTIN_MULPS,
14321 IX86_BUILTIN_MULSS,
14322 IX86_BUILTIN_SUBPS,
14323 IX86_BUILTIN_SUBSS,
14325 IX86_BUILTIN_CMPEQPS,
14326 IX86_BUILTIN_CMPLTPS,
14327 IX86_BUILTIN_CMPLEPS,
14328 IX86_BUILTIN_CMPGTPS,
14329 IX86_BUILTIN_CMPGEPS,
14330 IX86_BUILTIN_CMPNEQPS,
14331 IX86_BUILTIN_CMPNLTPS,
14332 IX86_BUILTIN_CMPNLEPS,
14333 IX86_BUILTIN_CMPNGTPS,
14334 IX86_BUILTIN_CMPNGEPS,
14335 IX86_BUILTIN_CMPORDPS,
14336 IX86_BUILTIN_CMPUNORDPS,
14337 IX86_BUILTIN_CMPEQSS,
14338 IX86_BUILTIN_CMPLTSS,
14339 IX86_BUILTIN_CMPLESS,
14340 IX86_BUILTIN_CMPNEQSS,
14341 IX86_BUILTIN_CMPNLTSS,
14342 IX86_BUILTIN_CMPNLESS,
14343 IX86_BUILTIN_CMPNGTSS,
14344 IX86_BUILTIN_CMPNGESS,
14345 IX86_BUILTIN_CMPORDSS,
14346 IX86_BUILTIN_CMPUNORDSS,
14348 IX86_BUILTIN_COMIEQSS,
14349 IX86_BUILTIN_COMILTSS,
14350 IX86_BUILTIN_COMILESS,
14351 IX86_BUILTIN_COMIGTSS,
14352 IX86_BUILTIN_COMIGESS,
14353 IX86_BUILTIN_COMINEQSS,
14354 IX86_BUILTIN_UCOMIEQSS,
14355 IX86_BUILTIN_UCOMILTSS,
14356 IX86_BUILTIN_UCOMILESS,
14357 IX86_BUILTIN_UCOMIGTSS,
14358 IX86_BUILTIN_UCOMIGESS,
14359 IX86_BUILTIN_UCOMINEQSS,
14361 IX86_BUILTIN_CVTPI2PS,
14362 IX86_BUILTIN_CVTPS2PI,
14363 IX86_BUILTIN_CVTSI2SS,
14364 IX86_BUILTIN_CVTSI642SS,
14365 IX86_BUILTIN_CVTSS2SI,
14366 IX86_BUILTIN_CVTSS2SI64,
14367 IX86_BUILTIN_CVTTPS2PI,
14368 IX86_BUILTIN_CVTTSS2SI,
14369 IX86_BUILTIN_CVTTSS2SI64,
14371 IX86_BUILTIN_MAXPS,
14372 IX86_BUILTIN_MAXSS,
14373 IX86_BUILTIN_MINPS,
14374 IX86_BUILTIN_MINSS,
14376 IX86_BUILTIN_LOADUPS,
14377 IX86_BUILTIN_STOREUPS,
14378 IX86_BUILTIN_MOVSS,
14380 IX86_BUILTIN_MOVHLPS,
14381 IX86_BUILTIN_MOVLHPS,
14382 IX86_BUILTIN_LOADHPS,
14383 IX86_BUILTIN_LOADLPS,
14384 IX86_BUILTIN_STOREHPS,
14385 IX86_BUILTIN_STORELPS,
14387 IX86_BUILTIN_MASKMOVQ,
14388 IX86_BUILTIN_MOVMSKPS,
14389 IX86_BUILTIN_PMOVMSKB,
14391 IX86_BUILTIN_MOVNTPS,
14392 IX86_BUILTIN_MOVNTQ,
14394 IX86_BUILTIN_LOADDQU,
14395 IX86_BUILTIN_STOREDQU,
14397 IX86_BUILTIN_PACKSSWB,
14398 IX86_BUILTIN_PACKSSDW,
14399 IX86_BUILTIN_PACKUSWB,
14401 IX86_BUILTIN_PADDB,
14402 IX86_BUILTIN_PADDW,
14403 IX86_BUILTIN_PADDD,
14404 IX86_BUILTIN_PADDQ,
14405 IX86_BUILTIN_PADDSB,
14406 IX86_BUILTIN_PADDSW,
14407 IX86_BUILTIN_PADDUSB,
14408 IX86_BUILTIN_PADDUSW,
14409 IX86_BUILTIN_PSUBB,
14410 IX86_BUILTIN_PSUBW,
14411 IX86_BUILTIN_PSUBD,
14412 IX86_BUILTIN_PSUBQ,
14413 IX86_BUILTIN_PSUBSB,
14414 IX86_BUILTIN_PSUBSW,
14415 IX86_BUILTIN_PSUBUSB,
14416 IX86_BUILTIN_PSUBUSW,
14419 IX86_BUILTIN_PANDN,
14423 IX86_BUILTIN_PAVGB,
14424 IX86_BUILTIN_PAVGW,
14426 IX86_BUILTIN_PCMPEQB,
14427 IX86_BUILTIN_PCMPEQW,
14428 IX86_BUILTIN_PCMPEQD,
14429 IX86_BUILTIN_PCMPGTB,
14430 IX86_BUILTIN_PCMPGTW,
14431 IX86_BUILTIN_PCMPGTD,
14433 IX86_BUILTIN_PMADDWD,
14435 IX86_BUILTIN_PMAXSW,
14436 IX86_BUILTIN_PMAXUB,
14437 IX86_BUILTIN_PMINSW,
14438 IX86_BUILTIN_PMINUB,
14440 IX86_BUILTIN_PMULHUW,
14441 IX86_BUILTIN_PMULHW,
14442 IX86_BUILTIN_PMULLW,
14444 IX86_BUILTIN_PSADBW,
14445 IX86_BUILTIN_PSHUFW,
14447 IX86_BUILTIN_PSLLW,
14448 IX86_BUILTIN_PSLLD,
14449 IX86_BUILTIN_PSLLQ,
14450 IX86_BUILTIN_PSRAW,
14451 IX86_BUILTIN_PSRAD,
14452 IX86_BUILTIN_PSRLW,
14453 IX86_BUILTIN_PSRLD,
14454 IX86_BUILTIN_PSRLQ,
14455 IX86_BUILTIN_PSLLWI,
14456 IX86_BUILTIN_PSLLDI,
14457 IX86_BUILTIN_PSLLQI,
14458 IX86_BUILTIN_PSRAWI,
14459 IX86_BUILTIN_PSRADI,
14460 IX86_BUILTIN_PSRLWI,
14461 IX86_BUILTIN_PSRLDI,
14462 IX86_BUILTIN_PSRLQI,
14464 IX86_BUILTIN_PUNPCKHBW,
14465 IX86_BUILTIN_PUNPCKHWD,
14466 IX86_BUILTIN_PUNPCKHDQ,
14467 IX86_BUILTIN_PUNPCKLBW,
14468 IX86_BUILTIN_PUNPCKLWD,
14469 IX86_BUILTIN_PUNPCKLDQ,
14471 IX86_BUILTIN_SHUFPS,
14473 IX86_BUILTIN_RCPPS,
14474 IX86_BUILTIN_RCPSS,
14475 IX86_BUILTIN_RSQRTPS,
14476 IX86_BUILTIN_RSQRTSS,
14477 IX86_BUILTIN_SQRTPS,
14478 IX86_BUILTIN_SQRTSS,
14480 IX86_BUILTIN_UNPCKHPS,
14481 IX86_BUILTIN_UNPCKLPS,
14483 IX86_BUILTIN_ANDPS,
14484 IX86_BUILTIN_ANDNPS,
14486 IX86_BUILTIN_XORPS,
14489 IX86_BUILTIN_LDMXCSR,
14490 IX86_BUILTIN_STMXCSR,
14491 IX86_BUILTIN_SFENCE,
14493 /* 3DNow! Original */
14494 IX86_BUILTIN_FEMMS,
14495 IX86_BUILTIN_PAVGUSB,
14496 IX86_BUILTIN_PF2ID,
14497 IX86_BUILTIN_PFACC,
14498 IX86_BUILTIN_PFADD,
14499 IX86_BUILTIN_PFCMPEQ,
14500 IX86_BUILTIN_PFCMPGE,
14501 IX86_BUILTIN_PFCMPGT,
14502 IX86_BUILTIN_PFMAX,
14503 IX86_BUILTIN_PFMIN,
14504 IX86_BUILTIN_PFMUL,
14505 IX86_BUILTIN_PFRCP,
14506 IX86_BUILTIN_PFRCPIT1,
14507 IX86_BUILTIN_PFRCPIT2,
14508 IX86_BUILTIN_PFRSQIT1,
14509 IX86_BUILTIN_PFRSQRT,
14510 IX86_BUILTIN_PFSUB,
14511 IX86_BUILTIN_PFSUBR,
14512 IX86_BUILTIN_PI2FD,
14513 IX86_BUILTIN_PMULHRW,
14515 /* 3DNow! Athlon Extensions */
14516 IX86_BUILTIN_PF2IW,
14517 IX86_BUILTIN_PFNACC,
14518 IX86_BUILTIN_PFPNACC,
14519 IX86_BUILTIN_PI2FW,
14520 IX86_BUILTIN_PSWAPDSI,
14521 IX86_BUILTIN_PSWAPDSF,
14524 IX86_BUILTIN_ADDPD,
14525 IX86_BUILTIN_ADDSD,
14526 IX86_BUILTIN_DIVPD,
14527 IX86_BUILTIN_DIVSD,
14528 IX86_BUILTIN_MULPD,
14529 IX86_BUILTIN_MULSD,
14530 IX86_BUILTIN_SUBPD,
14531 IX86_BUILTIN_SUBSD,
14533 IX86_BUILTIN_CMPEQPD,
14534 IX86_BUILTIN_CMPLTPD,
14535 IX86_BUILTIN_CMPLEPD,
14536 IX86_BUILTIN_CMPGTPD,
14537 IX86_BUILTIN_CMPGEPD,
14538 IX86_BUILTIN_CMPNEQPD,
14539 IX86_BUILTIN_CMPNLTPD,
14540 IX86_BUILTIN_CMPNLEPD,
14541 IX86_BUILTIN_CMPNGTPD,
14542 IX86_BUILTIN_CMPNGEPD,
14543 IX86_BUILTIN_CMPORDPD,
14544 IX86_BUILTIN_CMPUNORDPD,
14545 IX86_BUILTIN_CMPNEPD,
14546 IX86_BUILTIN_CMPEQSD,
14547 IX86_BUILTIN_CMPLTSD,
14548 IX86_BUILTIN_CMPLESD,
14549 IX86_BUILTIN_CMPNEQSD,
14550 IX86_BUILTIN_CMPNLTSD,
14551 IX86_BUILTIN_CMPNLESD,
14552 IX86_BUILTIN_CMPORDSD,
14553 IX86_BUILTIN_CMPUNORDSD,
14554 IX86_BUILTIN_CMPNESD,
14556 IX86_BUILTIN_COMIEQSD,
14557 IX86_BUILTIN_COMILTSD,
14558 IX86_BUILTIN_COMILESD,
14559 IX86_BUILTIN_COMIGTSD,
14560 IX86_BUILTIN_COMIGESD,
14561 IX86_BUILTIN_COMINEQSD,
14562 IX86_BUILTIN_UCOMIEQSD,
14563 IX86_BUILTIN_UCOMILTSD,
14564 IX86_BUILTIN_UCOMILESD,
14565 IX86_BUILTIN_UCOMIGTSD,
14566 IX86_BUILTIN_UCOMIGESD,
14567 IX86_BUILTIN_UCOMINEQSD,
14569 IX86_BUILTIN_MAXPD,
14570 IX86_BUILTIN_MAXSD,
14571 IX86_BUILTIN_MINPD,
14572 IX86_BUILTIN_MINSD,
14574 IX86_BUILTIN_ANDPD,
14575 IX86_BUILTIN_ANDNPD,
14577 IX86_BUILTIN_XORPD,
14579 IX86_BUILTIN_SQRTPD,
14580 IX86_BUILTIN_SQRTSD,
14582 IX86_BUILTIN_UNPCKHPD,
14583 IX86_BUILTIN_UNPCKLPD,
14585 IX86_BUILTIN_SHUFPD,
14587 IX86_BUILTIN_LOADUPD,
14588 IX86_BUILTIN_STOREUPD,
14589 IX86_BUILTIN_MOVSD,
14591 IX86_BUILTIN_LOADHPD,
14592 IX86_BUILTIN_LOADLPD,
14594 IX86_BUILTIN_CVTDQ2PD,
14595 IX86_BUILTIN_CVTDQ2PS,
14597 IX86_BUILTIN_CVTPD2DQ,
14598 IX86_BUILTIN_CVTPD2PI,
14599 IX86_BUILTIN_CVTPD2PS,
14600 IX86_BUILTIN_CVTTPD2DQ,
14601 IX86_BUILTIN_CVTTPD2PI,
14603 IX86_BUILTIN_CVTPI2PD,
14604 IX86_BUILTIN_CVTSI2SD,
14605 IX86_BUILTIN_CVTSI642SD,
14607 IX86_BUILTIN_CVTSD2SI,
14608 IX86_BUILTIN_CVTSD2SI64,
14609 IX86_BUILTIN_CVTSD2SS,
14610 IX86_BUILTIN_CVTSS2SD,
14611 IX86_BUILTIN_CVTTSD2SI,
14612 IX86_BUILTIN_CVTTSD2SI64,
14614 IX86_BUILTIN_CVTPS2DQ,
14615 IX86_BUILTIN_CVTPS2PD,
14616 IX86_BUILTIN_CVTTPS2DQ,
14618 IX86_BUILTIN_MOVNTI,
14619 IX86_BUILTIN_MOVNTPD,
14620 IX86_BUILTIN_MOVNTDQ,
14623 IX86_BUILTIN_MASKMOVDQU,
14624 IX86_BUILTIN_MOVMSKPD,
14625 IX86_BUILTIN_PMOVMSKB128,
14627 IX86_BUILTIN_PACKSSWB128,
14628 IX86_BUILTIN_PACKSSDW128,
14629 IX86_BUILTIN_PACKUSWB128,
14631 IX86_BUILTIN_PADDB128,
14632 IX86_BUILTIN_PADDW128,
14633 IX86_BUILTIN_PADDD128,
14634 IX86_BUILTIN_PADDQ128,
14635 IX86_BUILTIN_PADDSB128,
14636 IX86_BUILTIN_PADDSW128,
14637 IX86_BUILTIN_PADDUSB128,
14638 IX86_BUILTIN_PADDUSW128,
14639 IX86_BUILTIN_PSUBB128,
14640 IX86_BUILTIN_PSUBW128,
14641 IX86_BUILTIN_PSUBD128,
14642 IX86_BUILTIN_PSUBQ128,
14643 IX86_BUILTIN_PSUBSB128,
14644 IX86_BUILTIN_PSUBSW128,
14645 IX86_BUILTIN_PSUBUSB128,
14646 IX86_BUILTIN_PSUBUSW128,
14648 IX86_BUILTIN_PAND128,
14649 IX86_BUILTIN_PANDN128,
14650 IX86_BUILTIN_POR128,
14651 IX86_BUILTIN_PXOR128,
14653 IX86_BUILTIN_PAVGB128,
14654 IX86_BUILTIN_PAVGW128,
14656 IX86_BUILTIN_PCMPEQB128,
14657 IX86_BUILTIN_PCMPEQW128,
14658 IX86_BUILTIN_PCMPEQD128,
14659 IX86_BUILTIN_PCMPGTB128,
14660 IX86_BUILTIN_PCMPGTW128,
14661 IX86_BUILTIN_PCMPGTD128,
14663 IX86_BUILTIN_PMADDWD128,
14665 IX86_BUILTIN_PMAXSW128,
14666 IX86_BUILTIN_PMAXUB128,
14667 IX86_BUILTIN_PMINSW128,
14668 IX86_BUILTIN_PMINUB128,
14670 IX86_BUILTIN_PMULUDQ,
14671 IX86_BUILTIN_PMULUDQ128,
14672 IX86_BUILTIN_PMULHUW128,
14673 IX86_BUILTIN_PMULHW128,
14674 IX86_BUILTIN_PMULLW128,
14676 IX86_BUILTIN_PSADBW128,
14677 IX86_BUILTIN_PSHUFHW,
14678 IX86_BUILTIN_PSHUFLW,
14679 IX86_BUILTIN_PSHUFD,
14681 IX86_BUILTIN_PSLLW128,
14682 IX86_BUILTIN_PSLLD128,
14683 IX86_BUILTIN_PSLLQ128,
14684 IX86_BUILTIN_PSRAW128,
14685 IX86_BUILTIN_PSRAD128,
14686 IX86_BUILTIN_PSRLW128,
14687 IX86_BUILTIN_PSRLD128,
14688 IX86_BUILTIN_PSRLQ128,
14689 IX86_BUILTIN_PSLLDQI128,
14690 IX86_BUILTIN_PSLLWI128,
14691 IX86_BUILTIN_PSLLDI128,
14692 IX86_BUILTIN_PSLLQI128,
14693 IX86_BUILTIN_PSRAWI128,
14694 IX86_BUILTIN_PSRADI128,
14695 IX86_BUILTIN_PSRLDQI128,
14696 IX86_BUILTIN_PSRLWI128,
14697 IX86_BUILTIN_PSRLDI128,
14698 IX86_BUILTIN_PSRLQI128,
14700 IX86_BUILTIN_PUNPCKHBW128,
14701 IX86_BUILTIN_PUNPCKHWD128,
14702 IX86_BUILTIN_PUNPCKHDQ128,
14703 IX86_BUILTIN_PUNPCKHQDQ128,
14704 IX86_BUILTIN_PUNPCKLBW128,
14705 IX86_BUILTIN_PUNPCKLWD128,
14706 IX86_BUILTIN_PUNPCKLDQ128,
14707 IX86_BUILTIN_PUNPCKLQDQ128,
14709 IX86_BUILTIN_CLFLUSH,
14710 IX86_BUILTIN_MFENCE,
14711 IX86_BUILTIN_LFENCE,
14713 /* Prescott New Instructions. */
14714 IX86_BUILTIN_ADDSUBPS,
14715 IX86_BUILTIN_HADDPS,
14716 IX86_BUILTIN_HSUBPS,
14717 IX86_BUILTIN_MOVSHDUP,
14718 IX86_BUILTIN_MOVSLDUP,
14719 IX86_BUILTIN_ADDSUBPD,
14720 IX86_BUILTIN_HADDPD,
14721 IX86_BUILTIN_HSUBPD,
14722 IX86_BUILTIN_LDDQU,
14724 IX86_BUILTIN_MONITOR,
14725 IX86_BUILTIN_MWAIT,
14728 IX86_BUILTIN_PHADDW,
14729 IX86_BUILTIN_PHADDD,
14730 IX86_BUILTIN_PHADDSW,
14731 IX86_BUILTIN_PHSUBW,
14732 IX86_BUILTIN_PHSUBD,
14733 IX86_BUILTIN_PHSUBSW,
14734 IX86_BUILTIN_PMADDUBSW,
14735 IX86_BUILTIN_PMULHRSW,
14736 IX86_BUILTIN_PSHUFB,
14737 IX86_BUILTIN_PSIGNB,
14738 IX86_BUILTIN_PSIGNW,
14739 IX86_BUILTIN_PSIGND,
14740 IX86_BUILTIN_PALIGNR,
14741 IX86_BUILTIN_PABSB,
14742 IX86_BUILTIN_PABSW,
14743 IX86_BUILTIN_PABSD,
14745 IX86_BUILTIN_PHADDW128,
14746 IX86_BUILTIN_PHADDD128,
14747 IX86_BUILTIN_PHADDSW128,
14748 IX86_BUILTIN_PHSUBW128,
14749 IX86_BUILTIN_PHSUBD128,
14750 IX86_BUILTIN_PHSUBSW128,
14751 IX86_BUILTIN_PMADDUBSW128,
14752 IX86_BUILTIN_PMULHRSW128,
14753 IX86_BUILTIN_PSHUFB128,
14754 IX86_BUILTIN_PSIGNB128,
14755 IX86_BUILTIN_PSIGNW128,
14756 IX86_BUILTIN_PSIGND128,
14757 IX86_BUILTIN_PALIGNR128,
14758 IX86_BUILTIN_PABSB128,
14759 IX86_BUILTIN_PABSW128,
14760 IX86_BUILTIN_PABSD128,
14762 IX86_BUILTIN_VEC_INIT_V2SI,
14763 IX86_BUILTIN_VEC_INIT_V4HI,
14764 IX86_BUILTIN_VEC_INIT_V8QI,
14765 IX86_BUILTIN_VEC_EXT_V2DF,
14766 IX86_BUILTIN_VEC_EXT_V2DI,
14767 IX86_BUILTIN_VEC_EXT_V4SF,
14768 IX86_BUILTIN_VEC_EXT_V4SI,
14769 IX86_BUILTIN_VEC_EXT_V8HI,
14770 IX86_BUILTIN_VEC_EXT_V2SI,
14771 IX86_BUILTIN_VEC_EXT_V4HI,
14772 IX86_BUILTIN_VEC_SET_V8HI,
14773 IX86_BUILTIN_VEC_SET_V4HI,
14778 #define def_builtin(MASK, NAME, TYPE, CODE) \
14780 if ((MASK) & target_flags \
14781 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14782 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14783 NULL, NULL_TREE); \
14786 /* Bits for builtin_description.flag. */
14788 /* Set when we don't support the comparison natively, and should
14789 swap_comparison in order to support it. */
14790 #define BUILTIN_DESC_SWAP_OPERANDS 1
14792 struct builtin_description
14794 const unsigned int mask;
14795 const enum insn_code icode;
14796 const char *const name;
14797 const enum ix86_builtins code;
14798 const enum rtx_code comparison;
14799 const unsigned int flag;
14802 static const struct builtin_description bdesc_comi[] =
14804 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14805 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14806 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14807 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14808 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14809 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14810 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14811 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14812 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14813 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14814 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14815 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14824 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14825 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14827 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14830 static const struct builtin_description bdesc_2arg[] =
14833 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14834 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14835 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14836 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14837 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14838 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14839 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14840 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14842 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14843 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14844 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14845 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14846 BUILTIN_DESC_SWAP_OPERANDS },
14847 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14848 BUILTIN_DESC_SWAP_OPERANDS },
14849 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14850 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14851 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14852 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14853 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14854 BUILTIN_DESC_SWAP_OPERANDS },
14855 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14856 BUILTIN_DESC_SWAP_OPERANDS },
14857 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14858 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14859 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14860 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14861 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14862 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14863 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14864 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14865 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14866 BUILTIN_DESC_SWAP_OPERANDS },
14867 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14868 BUILTIN_DESC_SWAP_OPERANDS },
14869 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14871 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14872 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14873 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14874 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14876 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14877 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14878 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14879 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14881 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14882 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14883 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14884 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14885 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14888 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14889 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14890 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14892 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14893 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14894 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14897 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14898 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14899 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14900 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14901 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14902 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14903 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14904 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14906 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14907 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14908 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14910 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14911 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14912 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14913 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14915 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14916 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14918 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14919 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14920 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14921 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14922 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14923 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14925 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14926 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14927 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14928 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14930 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14931 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14932 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14933 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14934 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14935 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14938 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14939 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14940 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14942 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14943 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14944 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14946 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14947 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14948 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14949 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14950 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14951 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14953 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14954 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14955 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14956 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14957 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14958 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14960 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14961 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14962 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14963 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14965 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14966 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14969 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14970 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14971 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14972 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14973 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14974 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14975 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14976 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14978 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14979 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14982 BUILTIN_DESC_SWAP_OPERANDS },
14983 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14984 BUILTIN_DESC_SWAP_OPERANDS },
14985 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14986 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14987 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14988 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14989 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14990 BUILTIN_DESC_SWAP_OPERANDS },
14991 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14992 BUILTIN_DESC_SWAP_OPERANDS },
14993 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14994 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14995 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14996 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14997 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14998 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14999 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15000 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15001 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15003 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15004 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15005 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15006 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15008 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15009 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15010 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15011 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15013 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15014 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15015 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15018 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15019 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15020 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15021 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15022 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15023 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15024 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15025 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15027 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15028 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15029 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15030 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15031 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15032 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15033 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15034 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15036 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15037 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15039 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15040 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15041 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15042 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15044 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15045 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15047 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15048 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15049 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15050 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15051 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15052 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15054 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15055 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15056 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15057 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15059 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15060 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15061 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15062 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15063 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15064 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15065 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15066 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15068 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15069 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15070 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15072 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15073 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15075 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15076 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15078 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15079 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15080 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15082 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15083 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15084 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15086 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15087 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15089 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15091 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15092 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15093 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15094 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15097 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15098 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15099 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15100 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15101 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15102 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15105 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15106 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15107 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15108 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15109 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15110 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15111 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15112 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15113 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15114 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15115 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15116 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15117 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15118 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15119 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15120 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15121 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15122 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15123 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15124 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15125 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15126 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15127 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15128 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15131 static const struct builtin_description bdesc_1arg[] =
15133 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15134 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15136 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15137 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15138 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15140 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15141 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15142 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15143 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15144 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15145 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15147 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15148 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15150 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15152 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15153 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15155 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15156 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15157 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15158 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15159 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15161 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15163 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15164 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15165 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15166 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15168 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15169 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15170 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15173 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15174 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15177 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15178 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15179 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15180 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15181 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15182 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15186 ix86_init_builtins (void)
15189 ix86_init_mmx_sse_builtins ();
15192 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15193 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15196 ix86_init_mmx_sse_builtins (void)
15198 const struct builtin_description * d;
15201 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15202 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15203 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15204 tree V2DI_type_node
15205 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15206 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15207 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15208 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15209 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15210 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15211 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15213 tree pchar_type_node = build_pointer_type (char_type_node);
15214 tree pcchar_type_node = build_pointer_type (
15215 build_type_variant (char_type_node, 1, 0));
15216 tree pfloat_type_node = build_pointer_type (float_type_node);
15217 tree pcfloat_type_node = build_pointer_type (
15218 build_type_variant (float_type_node, 1, 0));
15219 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15220 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15221 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15224 tree int_ftype_v4sf_v4sf
15225 = build_function_type_list (integer_type_node,
15226 V4SF_type_node, V4SF_type_node, NULL_TREE);
15227 tree v4si_ftype_v4sf_v4sf
15228 = build_function_type_list (V4SI_type_node,
15229 V4SF_type_node, V4SF_type_node, NULL_TREE);
15230 /* MMX/SSE/integer conversions. */
15231 tree int_ftype_v4sf
15232 = build_function_type_list (integer_type_node,
15233 V4SF_type_node, NULL_TREE);
15234 tree int64_ftype_v4sf
15235 = build_function_type_list (long_long_integer_type_node,
15236 V4SF_type_node, NULL_TREE);
15237 tree int_ftype_v8qi
15238 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15239 tree v4sf_ftype_v4sf_int
15240 = build_function_type_list (V4SF_type_node,
15241 V4SF_type_node, integer_type_node, NULL_TREE);
15242 tree v4sf_ftype_v4sf_int64
15243 = build_function_type_list (V4SF_type_node,
15244 V4SF_type_node, long_long_integer_type_node,
15246 tree v4sf_ftype_v4sf_v2si
15247 = build_function_type_list (V4SF_type_node,
15248 V4SF_type_node, V2SI_type_node, NULL_TREE);
15250 /* Miscellaneous. */
15251 tree v8qi_ftype_v4hi_v4hi
15252 = build_function_type_list (V8QI_type_node,
15253 V4HI_type_node, V4HI_type_node, NULL_TREE);
15254 tree v4hi_ftype_v2si_v2si
15255 = build_function_type_list (V4HI_type_node,
15256 V2SI_type_node, V2SI_type_node, NULL_TREE);
15257 tree v4sf_ftype_v4sf_v4sf_int
15258 = build_function_type_list (V4SF_type_node,
15259 V4SF_type_node, V4SF_type_node,
15260 integer_type_node, NULL_TREE);
15261 tree v2si_ftype_v4hi_v4hi
15262 = build_function_type_list (V2SI_type_node,
15263 V4HI_type_node, V4HI_type_node, NULL_TREE);
15264 tree v4hi_ftype_v4hi_int
15265 = build_function_type_list (V4HI_type_node,
15266 V4HI_type_node, integer_type_node, NULL_TREE);
15267 tree v4hi_ftype_v4hi_di
15268 = build_function_type_list (V4HI_type_node,
15269 V4HI_type_node, long_long_unsigned_type_node,
15271 tree v2si_ftype_v2si_di
15272 = build_function_type_list (V2SI_type_node,
15273 V2SI_type_node, long_long_unsigned_type_node,
15275 tree void_ftype_void
15276 = build_function_type (void_type_node, void_list_node);
15277 tree void_ftype_unsigned
15278 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15279 tree void_ftype_unsigned_unsigned
15280 = build_function_type_list (void_type_node, unsigned_type_node,
15281 unsigned_type_node, NULL_TREE);
15282 tree void_ftype_pcvoid_unsigned_unsigned
15283 = build_function_type_list (void_type_node, const_ptr_type_node,
15284 unsigned_type_node, unsigned_type_node,
15286 tree unsigned_ftype_void
15287 = build_function_type (unsigned_type_node, void_list_node);
15288 tree v2si_ftype_v4sf
15289 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15290 /* Loads/stores. */
15291 tree void_ftype_v8qi_v8qi_pchar
15292 = build_function_type_list (void_type_node,
15293 V8QI_type_node, V8QI_type_node,
15294 pchar_type_node, NULL_TREE);
15295 tree v4sf_ftype_pcfloat
15296 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15297 /* @@@ the type is bogus */
15298 tree v4sf_ftype_v4sf_pv2si
15299 = build_function_type_list (V4SF_type_node,
15300 V4SF_type_node, pv2si_type_node, NULL_TREE);
15301 tree void_ftype_pv2si_v4sf
15302 = build_function_type_list (void_type_node,
15303 pv2si_type_node, V4SF_type_node, NULL_TREE);
15304 tree void_ftype_pfloat_v4sf
15305 = build_function_type_list (void_type_node,
15306 pfloat_type_node, V4SF_type_node, NULL_TREE);
15307 tree void_ftype_pdi_di
15308 = build_function_type_list (void_type_node,
15309 pdi_type_node, long_long_unsigned_type_node,
15311 tree void_ftype_pv2di_v2di
15312 = build_function_type_list (void_type_node,
15313 pv2di_type_node, V2DI_type_node, NULL_TREE);
15314 /* Normal vector unops. */
15315 tree v4sf_ftype_v4sf
15316 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15317 tree v16qi_ftype_v16qi
15318 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15319 tree v8hi_ftype_v8hi
15320 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15321 tree v4si_ftype_v4si
15322 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15323 tree v8qi_ftype_v8qi
15324 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15325 tree v4hi_ftype_v4hi
15326 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15328 /* Normal vector binops. */
15329 tree v4sf_ftype_v4sf_v4sf
15330 = build_function_type_list (V4SF_type_node,
15331 V4SF_type_node, V4SF_type_node, NULL_TREE);
15332 tree v8qi_ftype_v8qi_v8qi
15333 = build_function_type_list (V8QI_type_node,
15334 V8QI_type_node, V8QI_type_node, NULL_TREE);
15335 tree v4hi_ftype_v4hi_v4hi
15336 = build_function_type_list (V4HI_type_node,
15337 V4HI_type_node, V4HI_type_node, NULL_TREE);
15338 tree v2si_ftype_v2si_v2si
15339 = build_function_type_list (V2SI_type_node,
15340 V2SI_type_node, V2SI_type_node, NULL_TREE);
15341 tree di_ftype_di_di
15342 = build_function_type_list (long_long_unsigned_type_node,
15343 long_long_unsigned_type_node,
15344 long_long_unsigned_type_node, NULL_TREE);
15346 tree di_ftype_di_di_int
15347 = build_function_type_list (long_long_unsigned_type_node,
15348 long_long_unsigned_type_node,
15349 long_long_unsigned_type_node,
15350 integer_type_node, NULL_TREE);
15352 tree v2si_ftype_v2sf
15353 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15354 tree v2sf_ftype_v2si
15355 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15356 tree v2si_ftype_v2si
15357 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15358 tree v2sf_ftype_v2sf
15359 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15360 tree v2sf_ftype_v2sf_v2sf
15361 = build_function_type_list (V2SF_type_node,
15362 V2SF_type_node, V2SF_type_node, NULL_TREE);
15363 tree v2si_ftype_v2sf_v2sf
15364 = build_function_type_list (V2SI_type_node,
15365 V2SF_type_node, V2SF_type_node, NULL_TREE);
15366 tree pint_type_node = build_pointer_type (integer_type_node);
15367 tree pdouble_type_node = build_pointer_type (double_type_node);
15368 tree pcdouble_type_node = build_pointer_type (
15369 build_type_variant (double_type_node, 1, 0));
15370 tree int_ftype_v2df_v2df
15371 = build_function_type_list (integer_type_node,
15372 V2DF_type_node, V2DF_type_node, NULL_TREE);
15374 tree void_ftype_pcvoid
15375 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15376 tree v4sf_ftype_v4si
15377 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15378 tree v4si_ftype_v4sf
15379 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15380 tree v2df_ftype_v4si
15381 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15382 tree v4si_ftype_v2df
15383 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15384 tree v2si_ftype_v2df
15385 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15386 tree v4sf_ftype_v2df
15387 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15388 tree v2df_ftype_v2si
15389 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15390 tree v2df_ftype_v4sf
15391 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15392 tree int_ftype_v2df
15393 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15394 tree int64_ftype_v2df
15395 = build_function_type_list (long_long_integer_type_node,
15396 V2DF_type_node, NULL_TREE);
15397 tree v2df_ftype_v2df_int
15398 = build_function_type_list (V2DF_type_node,
15399 V2DF_type_node, integer_type_node, NULL_TREE);
15400 tree v2df_ftype_v2df_int64
15401 = build_function_type_list (V2DF_type_node,
15402 V2DF_type_node, long_long_integer_type_node,
15404 tree v4sf_ftype_v4sf_v2df
15405 = build_function_type_list (V4SF_type_node,
15406 V4SF_type_node, V2DF_type_node, NULL_TREE);
15407 tree v2df_ftype_v2df_v4sf
15408 = build_function_type_list (V2DF_type_node,
15409 V2DF_type_node, V4SF_type_node, NULL_TREE);
15410 tree v2df_ftype_v2df_v2df_int
15411 = build_function_type_list (V2DF_type_node,
15412 V2DF_type_node, V2DF_type_node,
15415 tree v2df_ftype_v2df_pcdouble
15416 = build_function_type_list (V2DF_type_node,
15417 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15418 tree void_ftype_pdouble_v2df
15419 = build_function_type_list (void_type_node,
15420 pdouble_type_node, V2DF_type_node, NULL_TREE);
15421 tree void_ftype_pint_int
15422 = build_function_type_list (void_type_node,
15423 pint_type_node, integer_type_node, NULL_TREE);
15424 tree void_ftype_v16qi_v16qi_pchar
15425 = build_function_type_list (void_type_node,
15426 V16QI_type_node, V16QI_type_node,
15427 pchar_type_node, NULL_TREE);
15428 tree v2df_ftype_pcdouble
15429 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15430 tree v2df_ftype_v2df_v2df
15431 = build_function_type_list (V2DF_type_node,
15432 V2DF_type_node, V2DF_type_node, NULL_TREE);
15433 tree v16qi_ftype_v16qi_v16qi
15434 = build_function_type_list (V16QI_type_node,
15435 V16QI_type_node, V16QI_type_node, NULL_TREE);
15436 tree v8hi_ftype_v8hi_v8hi
15437 = build_function_type_list (V8HI_type_node,
15438 V8HI_type_node, V8HI_type_node, NULL_TREE);
15439 tree v4si_ftype_v4si_v4si
15440 = build_function_type_list (V4SI_type_node,
15441 V4SI_type_node, V4SI_type_node, NULL_TREE);
15442 tree v2di_ftype_v2di_v2di
15443 = build_function_type_list (V2DI_type_node,
15444 V2DI_type_node, V2DI_type_node, NULL_TREE);
15445 tree v2di_ftype_v2df_v2df
15446 = build_function_type_list (V2DI_type_node,
15447 V2DF_type_node, V2DF_type_node, NULL_TREE);
15448 tree v2df_ftype_v2df
15449 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15450 tree v2di_ftype_v2di_int
15451 = build_function_type_list (V2DI_type_node,
15452 V2DI_type_node, integer_type_node, NULL_TREE);
15453 tree v2di_ftype_v2di_v2di_int
15454 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15455 V2DI_type_node, integer_type_node, NULL_TREE);
15456 tree v4si_ftype_v4si_int
15457 = build_function_type_list (V4SI_type_node,
15458 V4SI_type_node, integer_type_node, NULL_TREE);
15459 tree v8hi_ftype_v8hi_int
15460 = build_function_type_list (V8HI_type_node,
15461 V8HI_type_node, integer_type_node, NULL_TREE);
15462 tree v8hi_ftype_v8hi_v2di
15463 = build_function_type_list (V8HI_type_node,
15464 V8HI_type_node, V2DI_type_node, NULL_TREE);
15465 tree v4si_ftype_v4si_v2di
15466 = build_function_type_list (V4SI_type_node,
15467 V4SI_type_node, V2DI_type_node, NULL_TREE);
15468 tree v4si_ftype_v8hi_v8hi
15469 = build_function_type_list (V4SI_type_node,
15470 V8HI_type_node, V8HI_type_node, NULL_TREE);
15471 tree di_ftype_v8qi_v8qi
15472 = build_function_type_list (long_long_unsigned_type_node,
15473 V8QI_type_node, V8QI_type_node, NULL_TREE);
15474 tree di_ftype_v2si_v2si
15475 = build_function_type_list (long_long_unsigned_type_node,
15476 V2SI_type_node, V2SI_type_node, NULL_TREE);
15477 tree v2di_ftype_v16qi_v16qi
15478 = build_function_type_list (V2DI_type_node,
15479 V16QI_type_node, V16QI_type_node, NULL_TREE);
15480 tree v2di_ftype_v4si_v4si
15481 = build_function_type_list (V2DI_type_node,
15482 V4SI_type_node, V4SI_type_node, NULL_TREE);
15483 tree int_ftype_v16qi
15484 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15485 tree v16qi_ftype_pcchar
15486 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15487 tree void_ftype_pchar_v16qi
15488 = build_function_type_list (void_type_node,
15489 pchar_type_node, V16QI_type_node, NULL_TREE);
15492 tree float128_type;
15495 /* The __float80 type. */
15496 if (TYPE_MODE (long_double_type_node) == XFmode)
15497 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15501 /* The __float80 type. */
15502 float80_type = make_node (REAL_TYPE);
15503 TYPE_PRECISION (float80_type) = 80;
15504 layout_type (float80_type);
15505 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15510 float128_type = make_node (REAL_TYPE);
15511 TYPE_PRECISION (float128_type) = 128;
15512 layout_type (float128_type);
15513 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15516 /* Add all builtins that are more or less simple operations on two
15518 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15520 /* Use one of the operands; the target can have a different mode for
15521 mask-generating compares. */
15522 enum machine_mode mode;
15527 mode = insn_data[d->icode].operand[1].mode;
15532 type = v16qi_ftype_v16qi_v16qi;
15535 type = v8hi_ftype_v8hi_v8hi;
15538 type = v4si_ftype_v4si_v4si;
15541 type = v2di_ftype_v2di_v2di;
15544 type = v2df_ftype_v2df_v2df;
15547 type = v4sf_ftype_v4sf_v4sf;
15550 type = v8qi_ftype_v8qi_v8qi;
15553 type = v4hi_ftype_v4hi_v4hi;
15556 type = v2si_ftype_v2si_v2si;
15559 type = di_ftype_di_di;
15563 gcc_unreachable ();
15566 /* Override for comparisons. */
15567 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15568 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15569 type = v4si_ftype_v4sf_v4sf;
15571 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15572 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15573 type = v2di_ftype_v2df_v2df;
15575 def_builtin (d->mask, d->name, type, d->code);
15578 /* Add all builtins that are more or less simple operations on 1 operand. */
15579 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15581 enum machine_mode mode;
15586 mode = insn_data[d->icode].operand[1].mode;
15591 type = v16qi_ftype_v16qi;
15594 type = v8hi_ftype_v8hi;
15597 type = v4si_ftype_v4si;
15600 type = v2df_ftype_v2df;
15603 type = v4sf_ftype_v4sf;
15606 type = v8qi_ftype_v8qi;
15609 type = v4hi_ftype_v4hi;
15612 type = v2si_ftype_v2si;
15619 def_builtin (d->mask, d->name, type, d->code);
15622 /* Add the remaining MMX insns with somewhat more complicated types. */
15623 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15624 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15625 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15626 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15628 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15629 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15630 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15632 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15633 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15635 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15636 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15638 /* comi/ucomi insns. */
15639 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15640 if (d->mask == MASK_SSE2)
15641 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15643 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15645 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15646 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15647 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15649 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15650 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15651 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15652 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15653 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15654 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15655 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15656 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15657 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15658 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15659 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15661 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15663 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15664 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15666 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15667 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15668 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15669 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15671 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15672 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15673 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15674 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15676 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15678 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15680 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15681 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15682 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15683 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15684 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15685 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15687 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15689 /* Original 3DNow! */
15690 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15691 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15692 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15707 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15708 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15709 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15711 /* 3DNow! extension as used in the Athlon CPU. */
15712 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15713 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15717 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15720 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15722 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15723 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15725 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15726 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15728 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15729 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15730 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15731 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15732 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15734 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15735 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15736 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15737 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15739 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15740 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15742 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15744 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15745 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15747 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15748 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15749 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15750 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15751 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15753 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15755 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15756 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15757 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15758 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15761 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15762 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15764 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15765 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15766 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15769 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15770 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15771 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15773 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15774 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15776 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15777 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15779 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15780 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15781 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15783 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15784 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15785 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15787 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15788 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15790 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15791 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15792 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15793 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15795 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15796 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15797 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15798 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15800 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15801 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15803 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15805 /* Prescott New Instructions. */
15806 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15807 void_ftype_pcvoid_unsigned_unsigned,
15808 IX86_BUILTIN_MONITOR);
15809 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15810 void_ftype_unsigned_unsigned,
15811 IX86_BUILTIN_MWAIT);
15812 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15814 IX86_BUILTIN_MOVSHDUP);
15815 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15817 IX86_BUILTIN_MOVSLDUP);
15818 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15819 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15822 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15823 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15824 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15825 IX86_BUILTIN_PALIGNR);
15827 /* Access to the vec_init patterns. */
15828 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15829 integer_type_node, NULL_TREE);
15830 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15831 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15833 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15834 short_integer_type_node,
15835 short_integer_type_node,
15836 short_integer_type_node, NULL_TREE);
15837 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15838 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15840 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15841 char_type_node, char_type_node,
15842 char_type_node, char_type_node,
15843 char_type_node, char_type_node,
15844 char_type_node, NULL_TREE);
15845 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15846 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15848 /* Access to the vec_extract patterns. */
15849 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15850 integer_type_node, NULL_TREE);
15851 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15852 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15854 ftype = build_function_type_list (long_long_integer_type_node,
15855 V2DI_type_node, integer_type_node,
15857 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15858 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15860 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15861 integer_type_node, NULL_TREE);
15862 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15863 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15865 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15866 integer_type_node, NULL_TREE);
15867 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15868 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15870 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15871 integer_type_node, NULL_TREE);
15872 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15873 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15875 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15876 integer_type_node, NULL_TREE);
15877 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15878 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15880 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15881 integer_type_node, NULL_TREE);
15882 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15883 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15885 /* Access to the vec_set patterns. */
15886 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15888 integer_type_node, NULL_TREE);
15889 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15890 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15892 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15894 integer_type_node, NULL_TREE);
15895 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15896 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15899 /* Errors in the source file can cause expand_expr to return const0_rtx
15900 where we expect a vector. To avoid crashing, use one of the vector
15901 clear instructions. */
15903 safe_vector_operand (rtx x, enum machine_mode mode)
15905 if (x == const0_rtx)
15906 x = CONST0_RTX (mode);
15910 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15913 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15916 tree arg0 = TREE_VALUE (arglist);
15917 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15918 rtx op0 = expand_normal (arg0);
15919 rtx op1 = expand_normal (arg1);
15920 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15921 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15922 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15924 if (VECTOR_MODE_P (mode0))
15925 op0 = safe_vector_operand (op0, mode0);
15926 if (VECTOR_MODE_P (mode1))
15927 op1 = safe_vector_operand (op1, mode1);
15929 if (optimize || !target
15930 || GET_MODE (target) != tmode
15931 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15932 target = gen_reg_rtx (tmode);
15934 if (GET_MODE (op1) == SImode && mode1 == TImode)
15936 rtx x = gen_reg_rtx (V4SImode);
15937 emit_insn (gen_sse2_loadd (x, op1));
15938 op1 = gen_lowpart (TImode, x);
15941 /* The insn must want input operands in the same modes as the
15943 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15944 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15946 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15947 op0 = copy_to_mode_reg (mode0, op0);
15948 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15949 op1 = copy_to_mode_reg (mode1, op1);
15951 /* ??? Using ix86_fixup_binary_operands is problematic when
15952 we've got mismatched modes. Fake it. */
15958 if (tmode == mode0 && tmode == mode1)
15960 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15964 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15966 op0 = force_reg (mode0, op0);
15967 op1 = force_reg (mode1, op1);
15968 target = gen_reg_rtx (tmode);
15971 pat = GEN_FCN (icode) (target, op0, op1);
15978 /* Subroutine of ix86_expand_builtin to take care of stores. */
15981 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15984 tree arg0 = TREE_VALUE (arglist);
15985 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15986 rtx op0 = expand_normal (arg0);
15987 rtx op1 = expand_normal (arg1);
15988 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15989 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15991 if (VECTOR_MODE_P (mode1))
15992 op1 = safe_vector_operand (op1, mode1);
15994 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15995 op1 = copy_to_mode_reg (mode1, op1);
15997 pat = GEN_FCN (icode) (op0, op1);
16003 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16006 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16007 rtx target, int do_load)
16010 tree arg0 = TREE_VALUE (arglist);
16011 rtx op0 = expand_normal (arg0);
16012 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16013 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16015 if (optimize || !target
16016 || GET_MODE (target) != tmode
16017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16018 target = gen_reg_rtx (tmode);
16020 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16023 if (VECTOR_MODE_P (mode0))
16024 op0 = safe_vector_operand (op0, mode0);
16026 if ((optimize && !register_operand (op0, mode0))
16027 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16028 op0 = copy_to_mode_reg (mode0, op0);
16031 pat = GEN_FCN (icode) (target, op0);
16038 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16039 sqrtss, rsqrtss, rcpss. */
16042 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16045 tree arg0 = TREE_VALUE (arglist);
16046 rtx op1, op0 = expand_normal (arg0);
16047 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16048 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16050 if (optimize || !target
16051 || GET_MODE (target) != tmode
16052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16053 target = gen_reg_rtx (tmode);
16055 if (VECTOR_MODE_P (mode0))
16056 op0 = safe_vector_operand (op0, mode0);
16058 if ((optimize && !register_operand (op0, mode0))
16059 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16060 op0 = copy_to_mode_reg (mode0, op0);
16063 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16064 op1 = copy_to_mode_reg (mode0, op1);
16066 pat = GEN_FCN (icode) (target, op0, op1);
16073 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16076 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16080 tree arg0 = TREE_VALUE (arglist);
16081 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16082 rtx op0 = expand_normal (arg0);
16083 rtx op1 = expand_normal (arg1);
16085 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16086 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16087 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16088 enum rtx_code comparison = d->comparison;
16090 if (VECTOR_MODE_P (mode0))
16091 op0 = safe_vector_operand (op0, mode0);
16092 if (VECTOR_MODE_P (mode1))
16093 op1 = safe_vector_operand (op1, mode1);
16095 /* Swap operands if we have a comparison that isn't available in
16097 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16099 rtx tmp = gen_reg_rtx (mode1);
16100 emit_move_insn (tmp, op1);
16105 if (optimize || !target
16106 || GET_MODE (target) != tmode
16107 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16108 target = gen_reg_rtx (tmode);
16110 if ((optimize && !register_operand (op0, mode0))
16111 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16112 op0 = copy_to_mode_reg (mode0, op0);
16113 if ((optimize && !register_operand (op1, mode1))
16114 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16115 op1 = copy_to_mode_reg (mode1, op1);
16117 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16118 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16125 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16128 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16132 tree arg0 = TREE_VALUE (arglist);
16133 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16134 rtx op0 = expand_normal (arg0);
16135 rtx op1 = expand_normal (arg1);
16137 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16138 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16139 enum rtx_code comparison = d->comparison;
16141 if (VECTOR_MODE_P (mode0))
16142 op0 = safe_vector_operand (op0, mode0);
16143 if (VECTOR_MODE_P (mode1))
16144 op1 = safe_vector_operand (op1, mode1);
16146 /* Swap operands if we have a comparison that isn't available in
16148 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16155 target = gen_reg_rtx (SImode);
16156 emit_move_insn (target, const0_rtx);
16157 target = gen_rtx_SUBREG (QImode, target, 0);
16159 if ((optimize && !register_operand (op0, mode0))
16160 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16161 op0 = copy_to_mode_reg (mode0, op0);
16162 if ((optimize && !register_operand (op1, mode1))
16163 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16164 op1 = copy_to_mode_reg (mode1, op1);
16166 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16167 pat = GEN_FCN (d->icode) (op0, op1);
16171 emit_insn (gen_rtx_SET (VOIDmode,
16172 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16173 gen_rtx_fmt_ee (comparison, QImode,
16177 return SUBREG_REG (target);
16180 /* Return the integer constant in ARG. Constrain it to be in the range
16181 of the subparts of VEC_TYPE; issue an error if not. */
16184 get_element_number (tree vec_type, tree arg)
16186 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16188 if (!host_integerp (arg, 1)
16189 || (elt = tree_low_cst (arg, 1), elt > max))
16191 error ("selector must be an integer constant in the range 0..%wi", max);
16198 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16199 ix86_expand_vector_init. We DO have language-level syntax for this, in
16200 the form of (type){ init-list }. Except that since we can't place emms
16201 instructions from inside the compiler, we can't allow the use of MMX
16202 registers unless the user explicitly asks for it. So we do *not* define
16203 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16204 we have builtins invoked by mmintrin.h that gives us license to emit
16205 these sorts of instructions. */
16208 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16210 enum machine_mode tmode = TYPE_MODE (type);
16211 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16212 int i, n_elt = GET_MODE_NUNITS (tmode);
16213 rtvec v = rtvec_alloc (n_elt);
16215 gcc_assert (VECTOR_MODE_P (tmode));
16217 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16219 rtx x = expand_normal (TREE_VALUE (arglist));
16220 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16223 gcc_assert (arglist == NULL);
16225 if (!target || !register_operand (target, tmode))
16226 target = gen_reg_rtx (tmode);
16228 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16232 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16233 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16234 had a language-level syntax for referencing vector elements. */
16237 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16239 enum machine_mode tmode, mode0;
16244 arg0 = TREE_VALUE (arglist);
16245 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16247 op0 = expand_normal (arg0);
16248 elt = get_element_number (TREE_TYPE (arg0), arg1);
16250 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16251 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16252 gcc_assert (VECTOR_MODE_P (mode0));
16254 op0 = force_reg (mode0, op0);
16256 if (optimize || !target || !register_operand (target, tmode))
16257 target = gen_reg_rtx (tmode);
16259 ix86_expand_vector_extract (true, target, op0, elt);
16264 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16265 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16266 a language-level syntax for referencing vector elements. */
16269 ix86_expand_vec_set_builtin (tree arglist)
16271 enum machine_mode tmode, mode1;
16272 tree arg0, arg1, arg2;
16276 arg0 = TREE_VALUE (arglist);
16277 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16278 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16280 tmode = TYPE_MODE (TREE_TYPE (arg0));
16281 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16282 gcc_assert (VECTOR_MODE_P (tmode));
16284 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16285 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16286 elt = get_element_number (TREE_TYPE (arg0), arg2);
16288 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16289 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16291 op0 = force_reg (tmode, op0);
16292 op1 = force_reg (mode1, op1);
16294 ix86_expand_vector_set (true, op0, op1, elt);
16299 /* Expand an expression EXP that calls a built-in function,
16300 with result going to TARGET if that's convenient
16301 (and in mode MODE if that's convenient).
16302 SUBTARGET may be used as the target for computing one of EXP's operands.
16303 IGNORE is nonzero if the value is to be ignored. */
16306 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16307 enum machine_mode mode ATTRIBUTE_UNUSED,
16308 int ignore ATTRIBUTE_UNUSED)
16310 const struct builtin_description *d;
16312 enum insn_code icode;
16313 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16314 tree arglist = TREE_OPERAND (exp, 1);
16315 tree arg0, arg1, arg2;
16316 rtx op0, op1, op2, pat;
16317 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16318 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16322 case IX86_BUILTIN_EMMS:
16323 emit_insn (gen_mmx_emms ());
16326 case IX86_BUILTIN_SFENCE:
16327 emit_insn (gen_sse_sfence ());
16330 case IX86_BUILTIN_MASKMOVQ:
16331 case IX86_BUILTIN_MASKMOVDQU:
16332 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16333 ? CODE_FOR_mmx_maskmovq
16334 : CODE_FOR_sse2_maskmovdqu);
16335 /* Note the arg order is different from the operand order. */
16336 arg1 = TREE_VALUE (arglist);
16337 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16338 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16339 op0 = expand_normal (arg0);
16340 op1 = expand_normal (arg1);
16341 op2 = expand_normal (arg2);
16342 mode0 = insn_data[icode].operand[0].mode;
16343 mode1 = insn_data[icode].operand[1].mode;
16344 mode2 = insn_data[icode].operand[2].mode;
16346 op0 = force_reg (Pmode, op0);
16347 op0 = gen_rtx_MEM (mode1, op0);
16349 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16350 op0 = copy_to_mode_reg (mode0, op0);
16351 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16352 op1 = copy_to_mode_reg (mode1, op1);
16353 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16354 op2 = copy_to_mode_reg (mode2, op2);
16355 pat = GEN_FCN (icode) (op0, op1, op2);
16361 case IX86_BUILTIN_SQRTSS:
16362 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16363 case IX86_BUILTIN_RSQRTSS:
16364 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16365 case IX86_BUILTIN_RCPSS:
16366 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16368 case IX86_BUILTIN_LOADUPS:
16369 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16371 case IX86_BUILTIN_STOREUPS:
16372 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16374 case IX86_BUILTIN_LOADHPS:
16375 case IX86_BUILTIN_LOADLPS:
16376 case IX86_BUILTIN_LOADHPD:
16377 case IX86_BUILTIN_LOADLPD:
16378 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16379 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16380 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16381 : CODE_FOR_sse2_loadlpd);
16382 arg0 = TREE_VALUE (arglist);
16383 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16384 op0 = expand_normal (arg0);
16385 op1 = expand_normal (arg1);
16386 tmode = insn_data[icode].operand[0].mode;
16387 mode0 = insn_data[icode].operand[1].mode;
16388 mode1 = insn_data[icode].operand[2].mode;
16390 op0 = force_reg (mode0, op0);
16391 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16392 if (optimize || target == 0
16393 || GET_MODE (target) != tmode
16394 || !register_operand (target, tmode))
16395 target = gen_reg_rtx (tmode);
16396 pat = GEN_FCN (icode) (target, op0, op1);
16402 case IX86_BUILTIN_STOREHPS:
16403 case IX86_BUILTIN_STORELPS:
16404 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16405 : CODE_FOR_sse_storelps);
16406 arg0 = TREE_VALUE (arglist);
16407 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16408 op0 = expand_normal (arg0);
16409 op1 = expand_normal (arg1);
16410 mode0 = insn_data[icode].operand[0].mode;
16411 mode1 = insn_data[icode].operand[1].mode;
16413 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16414 op1 = force_reg (mode1, op1);
16416 pat = GEN_FCN (icode) (op0, op1);
16422 case IX86_BUILTIN_MOVNTPS:
16423 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16424 case IX86_BUILTIN_MOVNTQ:
16425 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16427 case IX86_BUILTIN_LDMXCSR:
16428 op0 = expand_normal (TREE_VALUE (arglist));
16429 target = assign_386_stack_local (SImode, SLOT_TEMP);
16430 emit_move_insn (target, op0);
16431 emit_insn (gen_sse_ldmxcsr (target));
16434 case IX86_BUILTIN_STMXCSR:
16435 target = assign_386_stack_local (SImode, SLOT_TEMP);
16436 emit_insn (gen_sse_stmxcsr (target));
16437 return copy_to_mode_reg (SImode, target);
16439 case IX86_BUILTIN_SHUFPS:
16440 case IX86_BUILTIN_SHUFPD:
16441 icode = (fcode == IX86_BUILTIN_SHUFPS
16442 ? CODE_FOR_sse_shufps
16443 : CODE_FOR_sse2_shufpd);
16444 arg0 = TREE_VALUE (arglist);
16445 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16446 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16447 op0 = expand_normal (arg0);
16448 op1 = expand_normal (arg1);
16449 op2 = expand_normal (arg2);
16450 tmode = insn_data[icode].operand[0].mode;
16451 mode0 = insn_data[icode].operand[1].mode;
16452 mode1 = insn_data[icode].operand[2].mode;
16453 mode2 = insn_data[icode].operand[3].mode;
16455 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16456 op0 = copy_to_mode_reg (mode0, op0);
16457 if ((optimize && !register_operand (op1, mode1))
16458 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16459 op1 = copy_to_mode_reg (mode1, op1);
16460 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16462 /* @@@ better error message */
16463 error ("mask must be an immediate");
16464 return gen_reg_rtx (tmode);
16466 if (optimize || target == 0
16467 || GET_MODE (target) != tmode
16468 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16469 target = gen_reg_rtx (tmode);
16470 pat = GEN_FCN (icode) (target, op0, op1, op2);
16476 case IX86_BUILTIN_PSHUFW:
16477 case IX86_BUILTIN_PSHUFD:
16478 case IX86_BUILTIN_PSHUFHW:
16479 case IX86_BUILTIN_PSHUFLW:
16480 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16481 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16482 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16483 : CODE_FOR_mmx_pshufw);
16484 arg0 = TREE_VALUE (arglist);
16485 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16486 op0 = expand_normal (arg0);
16487 op1 = expand_normal (arg1);
16488 tmode = insn_data[icode].operand[0].mode;
16489 mode1 = insn_data[icode].operand[1].mode;
16490 mode2 = insn_data[icode].operand[2].mode;
16492 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16493 op0 = copy_to_mode_reg (mode1, op0);
16494 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16496 /* @@@ better error message */
16497 error ("mask must be an immediate");
16501 || GET_MODE (target) != tmode
16502 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16503 target = gen_reg_rtx (tmode);
16504 pat = GEN_FCN (icode) (target, op0, op1);
16510 case IX86_BUILTIN_PSLLDQI128:
16511 case IX86_BUILTIN_PSRLDQI128:
16512 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16513 : CODE_FOR_sse2_lshrti3);
16514 arg0 = TREE_VALUE (arglist);
16515 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16516 op0 = expand_normal (arg0);
16517 op1 = expand_normal (arg1);
16518 tmode = insn_data[icode].operand[0].mode;
16519 mode1 = insn_data[icode].operand[1].mode;
16520 mode2 = insn_data[icode].operand[2].mode;
16522 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16524 op0 = copy_to_reg (op0);
16525 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16527 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16529 error ("shift must be an immediate");
16532 target = gen_reg_rtx (V2DImode);
16533 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16539 case IX86_BUILTIN_FEMMS:
16540 emit_insn (gen_mmx_femms ());
16543 case IX86_BUILTIN_PAVGUSB:
16544 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16546 case IX86_BUILTIN_PF2ID:
16547 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16549 case IX86_BUILTIN_PFACC:
16550 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16552 case IX86_BUILTIN_PFADD:
16553 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16555 case IX86_BUILTIN_PFCMPEQ:
16556 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16558 case IX86_BUILTIN_PFCMPGE:
16559 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16561 case IX86_BUILTIN_PFCMPGT:
16562 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16564 case IX86_BUILTIN_PFMAX:
16565 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16567 case IX86_BUILTIN_PFMIN:
16568 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16570 case IX86_BUILTIN_PFMUL:
16571 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16573 case IX86_BUILTIN_PFRCP:
16574 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16576 case IX86_BUILTIN_PFRCPIT1:
16577 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16579 case IX86_BUILTIN_PFRCPIT2:
16580 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16582 case IX86_BUILTIN_PFRSQIT1:
16583 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16585 case IX86_BUILTIN_PFRSQRT:
16586 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16588 case IX86_BUILTIN_PFSUB:
16589 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16591 case IX86_BUILTIN_PFSUBR:
16592 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16594 case IX86_BUILTIN_PI2FD:
16595 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16597 case IX86_BUILTIN_PMULHRW:
16598 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16600 case IX86_BUILTIN_PF2IW:
16601 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16603 case IX86_BUILTIN_PFNACC:
16604 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16606 case IX86_BUILTIN_PFPNACC:
16607 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16609 case IX86_BUILTIN_PI2FW:
16610 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16612 case IX86_BUILTIN_PSWAPDSI:
16613 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16615 case IX86_BUILTIN_PSWAPDSF:
16616 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16618 case IX86_BUILTIN_SQRTSD:
16619 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16620 case IX86_BUILTIN_LOADUPD:
16621 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16622 case IX86_BUILTIN_STOREUPD:
16623 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16625 case IX86_BUILTIN_MFENCE:
16626 emit_insn (gen_sse2_mfence ());
16628 case IX86_BUILTIN_LFENCE:
16629 emit_insn (gen_sse2_lfence ());
16632 case IX86_BUILTIN_CLFLUSH:
16633 arg0 = TREE_VALUE (arglist);
16634 op0 = expand_normal (arg0);
16635 icode = CODE_FOR_sse2_clflush;
16636 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16637 op0 = copy_to_mode_reg (Pmode, op0);
16639 emit_insn (gen_sse2_clflush (op0));
16642 case IX86_BUILTIN_MOVNTPD:
16643 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16644 case IX86_BUILTIN_MOVNTDQ:
16645 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16646 case IX86_BUILTIN_MOVNTI:
16647 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16649 case IX86_BUILTIN_LOADDQU:
16650 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16651 case IX86_BUILTIN_STOREDQU:
16652 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16654 case IX86_BUILTIN_MONITOR:
16655 arg0 = TREE_VALUE (arglist);
16656 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16657 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16658 op0 = expand_normal (arg0);
16659 op1 = expand_normal (arg1);
16660 op2 = expand_normal (arg2);
16662 op0 = copy_to_mode_reg (Pmode, op0);
16664 op1 = copy_to_mode_reg (SImode, op1);
16666 op2 = copy_to_mode_reg (SImode, op2);
16668 emit_insn (gen_sse3_monitor (op0, op1, op2));
16670 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16673 case IX86_BUILTIN_MWAIT:
16674 arg0 = TREE_VALUE (arglist);
16675 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16676 op0 = expand_normal (arg0);
16677 op1 = expand_normal (arg1);
16679 op0 = copy_to_mode_reg (SImode, op0);
16681 op1 = copy_to_mode_reg (SImode, op1);
16682 emit_insn (gen_sse3_mwait (op0, op1));
16685 case IX86_BUILTIN_LDDQU:
16686 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16689 case IX86_BUILTIN_PALIGNR:
16690 case IX86_BUILTIN_PALIGNR128:
16691 if (fcode == IX86_BUILTIN_PALIGNR)
16693 icode = CODE_FOR_ssse3_palignrdi;
16698 icode = CODE_FOR_ssse3_palignrti;
16701 arg0 = TREE_VALUE (arglist);
16702 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16703 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16704 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16705 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16706 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16707 tmode = insn_data[icode].operand[0].mode;
16708 mode1 = insn_data[icode].operand[1].mode;
16709 mode2 = insn_data[icode].operand[2].mode;
16710 mode3 = insn_data[icode].operand[3].mode;
16712 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16714 op0 = copy_to_reg (op0);
16715 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16719 op1 = copy_to_reg (op1);
16720 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16722 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16724 error ("shift must be an immediate");
16727 target = gen_reg_rtx (mode);
16728 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16735 case IX86_BUILTIN_VEC_INIT_V2SI:
16736 case IX86_BUILTIN_VEC_INIT_V4HI:
16737 case IX86_BUILTIN_VEC_INIT_V8QI:
16738 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16740 case IX86_BUILTIN_VEC_EXT_V2DF:
16741 case IX86_BUILTIN_VEC_EXT_V2DI:
16742 case IX86_BUILTIN_VEC_EXT_V4SF:
16743 case IX86_BUILTIN_VEC_EXT_V4SI:
16744 case IX86_BUILTIN_VEC_EXT_V8HI:
16745 case IX86_BUILTIN_VEC_EXT_V2SI:
16746 case IX86_BUILTIN_VEC_EXT_V4HI:
16747 return ix86_expand_vec_ext_builtin (arglist, target);
16749 case IX86_BUILTIN_VEC_SET_V8HI:
16750 case IX86_BUILTIN_VEC_SET_V4HI:
16751 return ix86_expand_vec_set_builtin (arglist);
16757 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16758 if (d->code == fcode)
16760 /* Compares are treated specially. */
16761 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16762 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16763 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16764 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16765 return ix86_expand_sse_compare (d, arglist, target);
16767 return ix86_expand_binop_builtin (d->icode, arglist, target);
16770 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16771 if (d->code == fcode)
16772 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16774 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16775 if (d->code == fcode)
16776 return ix86_expand_sse_comi (d, arglist, target);
16778 gcc_unreachable ();
16781 /* Store OPERAND to the memory after reload is completed. This means
16782 that we can't easily use assign_stack_local. */
16784 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16788 gcc_assert (reload_completed);
16789 if (TARGET_RED_ZONE)
16791 result = gen_rtx_MEM (mode,
16792 gen_rtx_PLUS (Pmode,
16794 GEN_INT (-RED_ZONE_SIZE)));
16795 emit_move_insn (result, operand);
16797 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16803 operand = gen_lowpart (DImode, operand);
16807 gen_rtx_SET (VOIDmode,
16808 gen_rtx_MEM (DImode,
16809 gen_rtx_PRE_DEC (DImode,
16810 stack_pointer_rtx)),
16814 gcc_unreachable ();
16816 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16825 split_di (&operand, 1, operands, operands + 1);
16827 gen_rtx_SET (VOIDmode,
16828 gen_rtx_MEM (SImode,
16829 gen_rtx_PRE_DEC (Pmode,
16830 stack_pointer_rtx)),
16833 gen_rtx_SET (VOIDmode,
16834 gen_rtx_MEM (SImode,
16835 gen_rtx_PRE_DEC (Pmode,
16836 stack_pointer_rtx)),
16841 /* Store HImodes as SImodes. */
16842 operand = gen_lowpart (SImode, operand);
16846 gen_rtx_SET (VOIDmode,
16847 gen_rtx_MEM (GET_MODE (operand),
16848 gen_rtx_PRE_DEC (SImode,
16849 stack_pointer_rtx)),
16853 gcc_unreachable ();
16855 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16860 /* Free operand from the memory. */
16862 ix86_free_from_memory (enum machine_mode mode)
16864 if (!TARGET_RED_ZONE)
16868 if (mode == DImode || TARGET_64BIT)
16872 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16873 to pop or add instruction if registers are available. */
16874 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16875 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16880 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16881 QImode must go into class Q_REGS.
16882 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16883 movdf to do mem-to-mem moves through integer regs. */
16885 ix86_preferred_reload_class (rtx x, enum reg_class class)
16887 enum machine_mode mode = GET_MODE (x);
16889 /* We're only allowed to return a subclass of CLASS. Many of the
16890 following checks fail for NO_REGS, so eliminate that early. */
16891 if (class == NO_REGS)
16894 /* All classes can load zeros. */
16895 if (x == CONST0_RTX (mode))
16898 /* Force constants into memory if we are loading a (nonzero) constant into
16899 an MMX or SSE register. This is because there are no MMX/SSE instructions
16900 to load from a constant. */
16902 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16905 /* Prefer SSE regs only, if we can use them for math. */
16906 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16907 return SSE_CLASS_P (class) ? class : NO_REGS;
16909 /* Floating-point constants need more complex checks. */
16910 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16912 /* General regs can load everything. */
16913 if (reg_class_subset_p (class, GENERAL_REGS))
16916 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16917 zero above. We only want to wind up preferring 80387 registers if
16918 we plan on doing computation with them. */
16920 && standard_80387_constant_p (x))
16922 /* Limit class to non-sse. */
16923 if (class == FLOAT_SSE_REGS)
16925 if (class == FP_TOP_SSE_REGS)
16927 if (class == FP_SECOND_SSE_REGS)
16928 return FP_SECOND_REG;
16929 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16936 /* Generally when we see PLUS here, it's the function invariant
16937 (plus soft-fp const_int). Which can only be computed into general
16939 if (GET_CODE (x) == PLUS)
16940 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16942 /* QImode constants are easy to load, but non-constant QImode data
16943 must go into Q_REGS. */
16944 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16946 if (reg_class_subset_p (class, Q_REGS))
16948 if (reg_class_subset_p (Q_REGS, class))
16956 /* Discourage putting floating-point values in SSE registers unless
16957 SSE math is being used, and likewise for the 387 registers. */
16959 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16961 enum machine_mode mode = GET_MODE (x);
16963 /* Restrict the output reload class to the register bank that we are doing
16964 math on. If we would like not to return a subset of CLASS, reject this
16965 alternative: if reload cannot do this, it will still use its choice. */
16966 mode = GET_MODE (x);
16967 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16968 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16970 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16972 if (class == FP_TOP_SSE_REGS)
16974 else if (class == FP_SECOND_SSE_REGS)
16975 return FP_SECOND_REG;
16977 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16983 /* If we are copying between general and FP registers, we need a memory
16984 location. The same is true for SSE and MMX registers.
16986 The macro can't work reliably when one of the CLASSES is class containing
16987 registers from multiple units (SSE, MMX, integer). We avoid this by never
16988 combining those units in single alternative in the machine description.
16989 Ensure that this constraint holds to avoid unexpected surprises.
16991 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16992 enforce these sanity checks. */
16995 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16996 enum machine_mode mode, int strict)
16998 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16999 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17000 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17001 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17002 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17003 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17005 gcc_assert (!strict);
17009 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17012 /* ??? This is a lie. We do have moves between mmx/general, and for
17013 mmx/sse2. But by saying we need secondary memory we discourage the
17014 register allocator from using the mmx registers unless needed. */
17015 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17018 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17020 /* SSE1 doesn't have any direct moves from other classes. */
17024 /* If the target says that inter-unit moves are more expensive
17025 than moving through memory, then don't generate them. */
17026 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17029 /* Between SSE and general, we have moves no larger than word size. */
17030 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17033 /* ??? For the cost of one register reformat penalty, we could use
17034 the same instructions to move SFmode and DFmode data, but the
17035 relevant move patterns don't support those alternatives. */
17036 if (mode == SFmode || mode == DFmode)
17043 /* Return true if the registers in CLASS cannot represent the change from
17044 modes FROM to TO. */
17047 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17048 enum reg_class class)
17053 /* x87 registers can't do subreg at all, as all values are reformatted
17054 to extended precision. */
17055 if (MAYBE_FLOAT_CLASS_P (class))
17058 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17060 /* Vector registers do not support QI or HImode loads. If we don't
17061 disallow a change to these modes, reload will assume it's ok to
17062 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17063 the vec_dupv4hi pattern. */
17064 if (GET_MODE_SIZE (from) < 4)
17067 /* Vector registers do not support subreg with nonzero offsets, which
17068 are otherwise valid for integer registers. Since we can't see
17069 whether we have a nonzero offset from here, prohibit all
17070 nonparadoxical subregs changing size. */
17071 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17078 /* Return the cost of moving data from a register in class CLASS1 to
17079 one in class CLASS2.
17081 It is not required that the cost always equal 2 when FROM is the same as TO;
17082 on some machines it is expensive to move between registers if they are not
17083 general registers. */
17086 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17087 enum reg_class class2)
17089 /* In case we require secondary memory, compute cost of the store followed
17090 by load. In order to avoid bad register allocation choices, we need
17091 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17093 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17097 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17098 MEMORY_MOVE_COST (mode, class1, 1));
17099 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17100 MEMORY_MOVE_COST (mode, class2, 1));
17102 /* In case of copying from general_purpose_register we may emit multiple
17103 stores followed by single load causing memory size mismatch stall.
17104 Count this as arbitrarily high cost of 20. */
17105 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17108 /* In the case of FP/MMX moves, the registers actually overlap, and we
17109 have to switch modes in order to treat them differently. */
17110 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17111 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17117 /* Moves between SSE/MMX and integer unit are expensive. */
17118 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17119 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17120 return ix86_cost->mmxsse_to_integer;
17121 if (MAYBE_FLOAT_CLASS_P (class1))
17122 return ix86_cost->fp_move;
17123 if (MAYBE_SSE_CLASS_P (class1))
17124 return ix86_cost->sse_move;
17125 if (MAYBE_MMX_CLASS_P (class1))
17126 return ix86_cost->mmx_move;
17130 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17133 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17135 /* Flags and only flags can only hold CCmode values. */
17136 if (CC_REGNO_P (regno))
17137 return GET_MODE_CLASS (mode) == MODE_CC;
17138 if (GET_MODE_CLASS (mode) == MODE_CC
17139 || GET_MODE_CLASS (mode) == MODE_RANDOM
17140 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17142 if (FP_REGNO_P (regno))
17143 return VALID_FP_MODE_P (mode);
17144 if (SSE_REGNO_P (regno))
17146 /* We implement the move patterns for all vector modes into and
17147 out of SSE registers, even when no operation instructions
17149 return (VALID_SSE_REG_MODE (mode)
17150 || VALID_SSE2_REG_MODE (mode)
17151 || VALID_MMX_REG_MODE (mode)
17152 || VALID_MMX_REG_MODE_3DNOW (mode));
17154 if (MMX_REGNO_P (regno))
17156 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17157 so if the register is available at all, then we can move data of
17158 the given mode into or out of it. */
17159 return (VALID_MMX_REG_MODE (mode)
17160 || VALID_MMX_REG_MODE_3DNOW (mode));
17163 if (mode == QImode)
17165 /* Take care for QImode values - they can be in non-QI regs,
17166 but then they do cause partial register stalls. */
17167 if (regno < 4 || TARGET_64BIT)
17169 if (!TARGET_PARTIAL_REG_STALL)
17171 return reload_in_progress || reload_completed;
17173 /* We handle both integer and floats in the general purpose registers. */
17174 else if (VALID_INT_MODE_P (mode))
17176 else if (VALID_FP_MODE_P (mode))
17178 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17179 on to use that value in smaller contexts, this can easily force a
17180 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17181 supporting DImode, allow it. */
17182 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17188 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17189 tieable integer mode. */
17192 ix86_tieable_integer_mode_p (enum machine_mode mode)
17201 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17204 return TARGET_64BIT;
17211 /* Return true if MODE1 is accessible in a register that can hold MODE2
17212 without copying. That is, all register classes that can hold MODE2
17213 can also hold MODE1. */
17216 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17218 if (mode1 == mode2)
17221 if (ix86_tieable_integer_mode_p (mode1)
17222 && ix86_tieable_integer_mode_p (mode2))
17225 /* MODE2 being XFmode implies fp stack or general regs, which means we
17226 can tie any smaller floating point modes to it. Note that we do not
17227 tie this with TFmode. */
17228 if (mode2 == XFmode)
17229 return mode1 == SFmode || mode1 == DFmode;
17231 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17232 that we can tie it with SFmode. */
17233 if (mode2 == DFmode)
17234 return mode1 == SFmode;
17236 /* If MODE2 is only appropriate for an SSE register, then tie with
17237 any other mode acceptable to SSE registers. */
17238 if (GET_MODE_SIZE (mode2) >= 8
17239 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17240 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17242 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17243 with any other mode acceptable to MMX registers. */
17244 if (GET_MODE_SIZE (mode2) == 8
17245 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17246 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17251 /* Return the cost of moving data of mode M between a
17252 register and memory. A value of 2 is the default; this cost is
17253 relative to those in `REGISTER_MOVE_COST'.
17255 If moving between registers and memory is more expensive than
17256 between two registers, you should define this macro to express the
17259 Model also increased moving costs of QImode registers in non
17263 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17265 if (FLOAT_CLASS_P (class))
17282 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17284 if (SSE_CLASS_P (class))
17287 switch (GET_MODE_SIZE (mode))
17301 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17303 if (MMX_CLASS_P (class))
17306 switch (GET_MODE_SIZE (mode))
17317 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17319 switch (GET_MODE_SIZE (mode))
17323 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17324 : ix86_cost->movzbl_load);
17326 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17327 : ix86_cost->int_store[0] + 4);
17330 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17332 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17333 if (mode == TFmode)
17335 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17336 * (((int) GET_MODE_SIZE (mode)
17337 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17341 /* Compute a (partial) cost for rtx X. Return true if the complete
17342 cost has been computed, and false if subexpressions should be
17343 scanned. In either case, *TOTAL contains the cost result. */
17346 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17348 enum machine_mode mode = GET_MODE (x);
17356 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17358 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17360 else if (flag_pic && SYMBOLIC_CONST (x)
17362 || (!GET_CODE (x) != LABEL_REF
17363 && (GET_CODE (x) != SYMBOL_REF
17364 || !SYMBOL_REF_LOCAL_P (x)))))
17371 if (mode == VOIDmode)
17374 switch (standard_80387_constant_p (x))
17379 default: /* Other constants */
17384 /* Start with (MEM (SYMBOL_REF)), since that's where
17385 it'll probably end up. Add a penalty for size. */
17386 *total = (COSTS_N_INSNS (1)
17387 + (flag_pic != 0 && !TARGET_64BIT)
17388 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17394 /* The zero extensions is often completely free on x86_64, so make
17395 it as cheap as possible. */
17396 if (TARGET_64BIT && mode == DImode
17397 && GET_MODE (XEXP (x, 0)) == SImode)
17399 else if (TARGET_ZERO_EXTEND_WITH_AND)
17400 *total = ix86_cost->add;
17402 *total = ix86_cost->movzx;
17406 *total = ix86_cost->movsx;
17410 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17411 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17413 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17416 *total = ix86_cost->add;
17419 if ((value == 2 || value == 3)
17420 && ix86_cost->lea <= ix86_cost->shift_const)
17422 *total = ix86_cost->lea;
17432 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17434 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17436 if (INTVAL (XEXP (x, 1)) > 32)
17437 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17439 *total = ix86_cost->shift_const * 2;
17443 if (GET_CODE (XEXP (x, 1)) == AND)
17444 *total = ix86_cost->shift_var * 2;
17446 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17451 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17452 *total = ix86_cost->shift_const;
17454 *total = ix86_cost->shift_var;
17459 if (FLOAT_MODE_P (mode))
17461 *total = ix86_cost->fmul;
17466 rtx op0 = XEXP (x, 0);
17467 rtx op1 = XEXP (x, 1);
17469 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17471 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17472 for (nbits = 0; value != 0; value &= value - 1)
17476 /* This is arbitrary. */
17479 /* Compute costs correctly for widening multiplication. */
17480 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17481 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17482 == GET_MODE_SIZE (mode))
17484 int is_mulwiden = 0;
17485 enum machine_mode inner_mode = GET_MODE (op0);
17487 if (GET_CODE (op0) == GET_CODE (op1))
17488 is_mulwiden = 1, op1 = XEXP (op1, 0);
17489 else if (GET_CODE (op1) == CONST_INT)
17491 if (GET_CODE (op0) == SIGN_EXTEND)
17492 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17495 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17499 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17502 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17503 + nbits * ix86_cost->mult_bit
17504 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17513 if (FLOAT_MODE_P (mode))
17514 *total = ix86_cost->fdiv;
17516 *total = ix86_cost->divide[MODE_INDEX (mode)];
17520 if (FLOAT_MODE_P (mode))
17521 *total = ix86_cost->fadd;
17522 else if (GET_MODE_CLASS (mode) == MODE_INT
17523 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17525 if (GET_CODE (XEXP (x, 0)) == PLUS
17526 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17527 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17528 && CONSTANT_P (XEXP (x, 1)))
17530 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17531 if (val == 2 || val == 4 || val == 8)
17533 *total = ix86_cost->lea;
17534 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17535 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17537 *total += rtx_cost (XEXP (x, 1), outer_code);
17541 else if (GET_CODE (XEXP (x, 0)) == MULT
17542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17544 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17545 if (val == 2 || val == 4 || val == 8)
17547 *total = ix86_cost->lea;
17548 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17549 *total += rtx_cost (XEXP (x, 1), outer_code);
17553 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17555 *total = ix86_cost->lea;
17556 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17557 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17558 *total += rtx_cost (XEXP (x, 1), outer_code);
17565 if (FLOAT_MODE_P (mode))
17567 *total = ix86_cost->fadd;
17575 if (!TARGET_64BIT && mode == DImode)
17577 *total = (ix86_cost->add * 2
17578 + (rtx_cost (XEXP (x, 0), outer_code)
17579 << (GET_MODE (XEXP (x, 0)) != DImode))
17580 + (rtx_cost (XEXP (x, 1), outer_code)
17581 << (GET_MODE (XEXP (x, 1)) != DImode)));
17587 if (FLOAT_MODE_P (mode))
17589 *total = ix86_cost->fchs;
17595 if (!TARGET_64BIT && mode == DImode)
17596 *total = ix86_cost->add * 2;
17598 *total = ix86_cost->add;
17602 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17603 && XEXP (XEXP (x, 0), 1) == const1_rtx
17604 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17605 && XEXP (x, 1) == const0_rtx)
17607 /* This kind of construct is implemented using test[bwl].
17608 Treat it as if we had an AND. */
17609 *total = (ix86_cost->add
17610 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17611 + rtx_cost (const1_rtx, outer_code));
17617 if (!TARGET_SSE_MATH
17619 || (mode == DFmode && !TARGET_SSE2))
17620 /* For standard 80387 constants, raise the cost to prevent
17621 compress_float_constant() to generate load from memory. */
17622 switch (standard_80387_constant_p (XEXP (x, 0)))
17632 *total = (x86_ext_80387_constants & TUNEMASK
17639 if (FLOAT_MODE_P (mode))
17640 *total = ix86_cost->fabs;
17644 if (FLOAT_MODE_P (mode))
17645 *total = ix86_cost->fsqrt;
17649 if (XINT (x, 1) == UNSPEC_TP)
17660 static int current_machopic_label_num;
17662 /* Given a symbol name and its associated stub, write out the
17663 definition of the stub. */
17666 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17668 unsigned int length;
17669 char *binder_name, *symbol_name, lazy_ptr_name[32];
17670 int label = ++current_machopic_label_num;
17672 /* For 64-bit we shouldn't get here. */
17673 gcc_assert (!TARGET_64BIT);
17675 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17676 symb = (*targetm.strip_name_encoding) (symb);
17678 length = strlen (stub);
17679 binder_name = alloca (length + 32);
17680 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17682 length = strlen (symb);
17683 symbol_name = alloca (length + 32);
17684 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17686 sprintf (lazy_ptr_name, "L%d$lz", label);
17689 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17691 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17693 fprintf (file, "%s:\n", stub);
17694 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17698 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17699 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17700 fprintf (file, "\tjmp\t*%%edx\n");
17703 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17705 fprintf (file, "%s:\n", binder_name);
17709 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17710 fprintf (file, "\tpushl\t%%eax\n");
17713 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17715 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17717 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17718 fprintf (file, "%s:\n", lazy_ptr_name);
17719 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17720 fprintf (file, "\t.long %s\n", binder_name);
17724 darwin_x86_file_end (void)
17726 darwin_file_end ();
17729 #endif /* TARGET_MACHO */
17731 /* Order the registers for register allocator. */
17734 x86_order_regs_for_local_alloc (void)
17739 /* First allocate the local general purpose registers. */
17740 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17741 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17742 reg_alloc_order [pos++] = i;
17744 /* Global general purpose registers. */
17745 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17746 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17747 reg_alloc_order [pos++] = i;
17749 /* x87 registers come first in case we are doing FP math
17751 if (!TARGET_SSE_MATH)
17752 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17753 reg_alloc_order [pos++] = i;
17755 /* SSE registers. */
17756 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17757 reg_alloc_order [pos++] = i;
17758 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17759 reg_alloc_order [pos++] = i;
17761 /* x87 registers. */
17762 if (TARGET_SSE_MATH)
17763 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17764 reg_alloc_order [pos++] = i;
17766 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17767 reg_alloc_order [pos++] = i;
17769 /* Initialize the rest of array as we do not allocate some registers
17771 while (pos < FIRST_PSEUDO_REGISTER)
17772 reg_alloc_order [pos++] = 0;
17775 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17776 struct attribute_spec.handler. */
17778 ix86_handle_struct_attribute (tree *node, tree name,
17779 tree args ATTRIBUTE_UNUSED,
17780 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17783 if (DECL_P (*node))
17785 if (TREE_CODE (*node) == TYPE_DECL)
17786 type = &TREE_TYPE (*node);
17791 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17792 || TREE_CODE (*type) == UNION_TYPE)))
17794 warning (OPT_Wattributes, "%qs attribute ignored",
17795 IDENTIFIER_POINTER (name));
17796 *no_add_attrs = true;
17799 else if ((is_attribute_p ("ms_struct", name)
17800 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17801 || ((is_attribute_p ("gcc_struct", name)
17802 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17804 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17805 IDENTIFIER_POINTER (name));
17806 *no_add_attrs = true;
17813 ix86_ms_bitfield_layout_p (tree record_type)
17815 return (TARGET_MS_BITFIELD_LAYOUT &&
17816 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17817 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17820 /* Returns an expression indicating where the this parameter is
17821 located on entry to the FUNCTION. */
17824 x86_this_parameter (tree function)
17826 tree type = TREE_TYPE (function);
17830 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17831 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17834 if (ix86_function_regparm (type, function) > 0)
17838 parm = TYPE_ARG_TYPES (type);
17839 /* Figure out whether or not the function has a variable number of
17841 for (; parm; parm = TREE_CHAIN (parm))
17842 if (TREE_VALUE (parm) == void_type_node)
17844 /* If not, the this parameter is in the first argument. */
17848 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17850 return gen_rtx_REG (SImode, regno);
17854 if (aggregate_value_p (TREE_TYPE (type), type))
17855 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17857 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17860 /* Determine whether x86_output_mi_thunk can succeed. */
17863 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17864 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17865 HOST_WIDE_INT vcall_offset, tree function)
17867 /* 64-bit can handle anything. */
17871 /* For 32-bit, everything's fine if we have one free register. */
17872 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17875 /* Need a free register for vcall_offset. */
17879 /* Need a free register for GOT references. */
17880 if (flag_pic && !(*targetm.binds_local_p) (function))
17883 /* Otherwise ok. */
17887 /* Output the assembler code for a thunk function. THUNK_DECL is the
17888 declaration for the thunk function itself, FUNCTION is the decl for
17889 the target function. DELTA is an immediate constant offset to be
17890 added to THIS. If VCALL_OFFSET is nonzero, the word at
17891 *(*this + vcall_offset) should be added to THIS. */
17894 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17895 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17896 HOST_WIDE_INT vcall_offset, tree function)
17899 rtx this = x86_this_parameter (function);
17902 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17903 pull it in now and let DELTA benefit. */
17906 else if (vcall_offset)
17908 /* Put the this parameter into %eax. */
17910 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17911 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17914 this_reg = NULL_RTX;
17916 /* Adjust the this parameter by a fixed constant. */
17919 xops[0] = GEN_INT (delta);
17920 xops[1] = this_reg ? this_reg : this;
17923 if (!x86_64_general_operand (xops[0], DImode))
17925 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17927 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17931 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17934 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17937 /* Adjust the this parameter by a value stored in the vtable. */
17941 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17944 int tmp_regno = 2 /* ECX */;
17945 if (lookup_attribute ("fastcall",
17946 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17947 tmp_regno = 0 /* EAX */;
17948 tmp = gen_rtx_REG (SImode, tmp_regno);
17951 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17954 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17956 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17958 /* Adjust the this parameter. */
17959 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17960 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17962 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17963 xops[0] = GEN_INT (vcall_offset);
17965 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17966 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17968 xops[1] = this_reg;
17970 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17972 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17975 /* If necessary, drop THIS back to its stack slot. */
17976 if (this_reg && this_reg != this)
17978 xops[0] = this_reg;
17980 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17983 xops[0] = XEXP (DECL_RTL (function), 0);
17986 if (!flag_pic || (*targetm.binds_local_p) (function))
17987 output_asm_insn ("jmp\t%P0", xops);
17990 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17991 tmp = gen_rtx_CONST (Pmode, tmp);
17992 tmp = gen_rtx_MEM (QImode, tmp);
17994 output_asm_insn ("jmp\t%A0", xops);
17999 if (!flag_pic || (*targetm.binds_local_p) (function))
18000 output_asm_insn ("jmp\t%P0", xops);
18005 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18006 tmp = (gen_rtx_SYMBOL_REF
18008 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18009 tmp = gen_rtx_MEM (QImode, tmp);
18011 output_asm_insn ("jmp\t%0", xops);
18014 #endif /* TARGET_MACHO */
18016 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18017 output_set_got (tmp, NULL_RTX);
18020 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18021 output_asm_insn ("jmp\t{*}%1", xops);
18027 x86_file_start (void)
18029 default_file_start ();
18031 darwin_file_start ();
18033 if (X86_FILE_START_VERSION_DIRECTIVE)
18034 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18035 if (X86_FILE_START_FLTUSED)
18036 fputs ("\t.global\t__fltused\n", asm_out_file);
18037 if (ix86_asm_dialect == ASM_INTEL)
18038 fputs ("\t.intel_syntax\n", asm_out_file);
18042 x86_field_alignment (tree field, int computed)
18044 enum machine_mode mode;
18045 tree type = TREE_TYPE (field);
18047 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18049 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18050 ? get_inner_array_type (type) : type);
18051 if (mode == DFmode || mode == DCmode
18052 || GET_MODE_CLASS (mode) == MODE_INT
18053 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18054 return MIN (32, computed);
18058 /* Output assembler code to FILE to increment profiler label # LABELNO
18059 for profiling a function entry. */
18061 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18066 #ifndef NO_PROFILE_COUNTERS
18067 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18069 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18073 #ifndef NO_PROFILE_COUNTERS
18074 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18076 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18080 #ifndef NO_PROFILE_COUNTERS
18081 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18082 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18084 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18088 #ifndef NO_PROFILE_COUNTERS
18089 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18090 PROFILE_COUNT_REGISTER);
18092 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18096 /* We don't have exact information about the insn sizes, but we may assume
18097 quite safely that we are informed about all 1 byte insns and memory
18098 address sizes. This is enough to eliminate unnecessary padding in
18102 min_insn_size (rtx insn)
18106 if (!INSN_P (insn) || !active_insn_p (insn))
18109 /* Discard alignments we've emit and jump instructions. */
18110 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18111 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18113 if (GET_CODE (insn) == JUMP_INSN
18114 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18115 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18118 /* Important case - calls are always 5 bytes.
18119 It is common to have many calls in the row. */
18120 if (GET_CODE (insn) == CALL_INSN
18121 && symbolic_reference_mentioned_p (PATTERN (insn))
18122 && !SIBLING_CALL_P (insn))
18124 if (get_attr_length (insn) <= 1)
18127 /* For normal instructions we may rely on the sizes of addresses
18128 and the presence of symbol to require 4 bytes of encoding.
18129 This is not the case for jumps where references are PC relative. */
18130 if (GET_CODE (insn) != JUMP_INSN)
18132 l = get_attr_length_address (insn);
18133 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18142 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18146 ix86_avoid_jump_misspredicts (void)
18148 rtx insn, start = get_insns ();
18149 int nbytes = 0, njumps = 0;
18152 /* Look for all minimal intervals of instructions containing 4 jumps.
18153 The intervals are bounded by START and INSN. NBYTES is the total
18154 size of instructions in the interval including INSN and not including
18155 START. When the NBYTES is smaller than 16 bytes, it is possible
18156 that the end of START and INSN ends up in the same 16byte page.
18158 The smallest offset in the page INSN can start is the case where START
18159 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18160 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18162 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18165 nbytes += min_insn_size (insn);
18167 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18168 INSN_UID (insn), min_insn_size (insn));
18169 if ((GET_CODE (insn) == JUMP_INSN
18170 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18171 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18172 || GET_CODE (insn) == CALL_INSN)
18179 start = NEXT_INSN (start);
18180 if ((GET_CODE (start) == JUMP_INSN
18181 && GET_CODE (PATTERN (start)) != ADDR_VEC
18182 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18183 || GET_CODE (start) == CALL_INSN)
18184 njumps--, isjump = 1;
18187 nbytes -= min_insn_size (start);
18189 gcc_assert (njumps >= 0);
18191 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18192 INSN_UID (start), INSN_UID (insn), nbytes);
18194 if (njumps == 3 && isjump && nbytes < 16)
18196 int padsize = 15 - nbytes + min_insn_size (insn);
18199 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18200 INSN_UID (insn), padsize);
18201 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18206 /* AMD Athlon works faster
18207 when RET is not destination of conditional jump or directly preceded
18208 by other jump instruction. We avoid the penalty by inserting NOP just
18209 before the RET instructions in such cases. */
18211 ix86_pad_returns (void)
18216 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18218 basic_block bb = e->src;
18219 rtx ret = BB_END (bb);
18221 bool replace = false;
18223 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18224 || !maybe_hot_bb_p (bb))
18226 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18227 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18229 if (prev && GET_CODE (prev) == CODE_LABEL)
18234 FOR_EACH_EDGE (e, ei, bb->preds)
18235 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18236 && !(e->flags & EDGE_FALLTHRU))
18241 prev = prev_active_insn (ret);
18243 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18244 || GET_CODE (prev) == CALL_INSN))
18246 /* Empty functions get branch mispredict even when the jump destination
18247 is not visible to us. */
18248 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18253 emit_insn_before (gen_return_internal_long (), ret);
18259 /* Implement machine specific optimizations. We implement padding of returns
18260 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18264 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18265 ix86_pad_returns ();
18266 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18267 ix86_avoid_jump_misspredicts ();
18270 /* Return nonzero when QImode register that must be represented via REX prefix
18273 x86_extended_QIreg_mentioned_p (rtx insn)
18276 extract_insn_cached (insn);
18277 for (i = 0; i < recog_data.n_operands; i++)
18278 if (REG_P (recog_data.operand[i])
18279 && REGNO (recog_data.operand[i]) >= 4)
18284 /* Return nonzero when P points to register encoded via REX prefix.
18285 Called via for_each_rtx. */
18287 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18289 unsigned int regno;
18292 regno = REGNO (*p);
18293 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18296 /* Return true when INSN mentions register that must be encoded using REX
18299 x86_extended_reg_mentioned_p (rtx insn)
18301 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18304 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18305 optabs would emit if we didn't have TFmode patterns. */
18308 x86_emit_floatuns (rtx operands[2])
18310 rtx neglab, donelab, i0, i1, f0, in, out;
18311 enum machine_mode mode, inmode;
18313 inmode = GET_MODE (operands[1]);
18314 gcc_assert (inmode == SImode || inmode == DImode);
18317 in = force_reg (inmode, operands[1]);
18318 mode = GET_MODE (out);
18319 neglab = gen_label_rtx ();
18320 donelab = gen_label_rtx ();
18321 i1 = gen_reg_rtx (Pmode);
18322 f0 = gen_reg_rtx (mode);
18324 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18326 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18327 emit_jump_insn (gen_jump (donelab));
18330 emit_label (neglab);
18332 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18333 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18334 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18335 expand_float (f0, i0, 0);
18336 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18338 emit_label (donelab);
18341 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18342 with all elements equal to VAR. Return true if successful. */
18345 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18346 rtx target, rtx val)
18348 enum machine_mode smode, wsmode, wvmode;
18363 val = force_reg (GET_MODE_INNER (mode), val);
18364 x = gen_rtx_VEC_DUPLICATE (mode, val);
18365 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18371 if (TARGET_SSE || TARGET_3DNOW_A)
18373 val = gen_lowpart (SImode, val);
18374 x = gen_rtx_TRUNCATE (HImode, val);
18375 x = gen_rtx_VEC_DUPLICATE (mode, x);
18376 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18398 /* Extend HImode to SImode using a paradoxical SUBREG. */
18399 tmp1 = gen_reg_rtx (SImode);
18400 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18401 /* Insert the SImode value as low element of V4SImode vector. */
18402 tmp2 = gen_reg_rtx (V4SImode);
18403 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18404 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18405 CONST0_RTX (V4SImode),
18407 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18408 /* Cast the V4SImode vector back to a V8HImode vector. */
18409 tmp1 = gen_reg_rtx (V8HImode);
18410 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18411 /* Duplicate the low short through the whole low SImode word. */
18412 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18413 /* Cast the V8HImode vector back to a V4SImode vector. */
18414 tmp2 = gen_reg_rtx (V4SImode);
18415 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18416 /* Replicate the low element of the V4SImode vector. */
18417 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18418 /* Cast the V2SImode back to V8HImode, and store in target. */
18419 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18430 /* Extend QImode to SImode using a paradoxical SUBREG. */
18431 tmp1 = gen_reg_rtx (SImode);
18432 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18433 /* Insert the SImode value as low element of V4SImode vector. */
18434 tmp2 = gen_reg_rtx (V4SImode);
18435 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18436 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18437 CONST0_RTX (V4SImode),
18439 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18440 /* Cast the V4SImode vector back to a V16QImode vector. */
18441 tmp1 = gen_reg_rtx (V16QImode);
18442 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18443 /* Duplicate the low byte through the whole low SImode word. */
18444 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18445 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18446 /* Cast the V16QImode vector back to a V4SImode vector. */
18447 tmp2 = gen_reg_rtx (V4SImode);
18448 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18449 /* Replicate the low element of the V4SImode vector. */
18450 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18451 /* Cast the V2SImode back to V16QImode, and store in target. */
18452 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18460 /* Replicate the value once into the next wider mode and recurse. */
18461 val = convert_modes (wsmode, smode, val, true);
18462 x = expand_simple_binop (wsmode, ASHIFT, val,
18463 GEN_INT (GET_MODE_BITSIZE (smode)),
18464 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18465 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18467 x = gen_reg_rtx (wvmode);
18468 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18469 gcc_unreachable ();
18470 emit_move_insn (target, gen_lowpart (mode, x));
18478 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18479 whose ONE_VAR element is VAR, and other elements are zero. Return true
18483 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18484 rtx target, rtx var, int one_var)
18486 enum machine_mode vsimode;
18502 var = force_reg (GET_MODE_INNER (mode), var);
18503 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18504 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18509 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18510 new_target = gen_reg_rtx (mode);
18512 new_target = target;
18513 var = force_reg (GET_MODE_INNER (mode), var);
18514 x = gen_rtx_VEC_DUPLICATE (mode, var);
18515 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18516 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18519 /* We need to shuffle the value to the correct position, so
18520 create a new pseudo to store the intermediate result. */
18522 /* With SSE2, we can use the integer shuffle insns. */
18523 if (mode != V4SFmode && TARGET_SSE2)
18525 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18527 GEN_INT (one_var == 1 ? 0 : 1),
18528 GEN_INT (one_var == 2 ? 0 : 1),
18529 GEN_INT (one_var == 3 ? 0 : 1)));
18530 if (target != new_target)
18531 emit_move_insn (target, new_target);
18535 /* Otherwise convert the intermediate result to V4SFmode and
18536 use the SSE1 shuffle instructions. */
18537 if (mode != V4SFmode)
18539 tmp = gen_reg_rtx (V4SFmode);
18540 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18545 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18547 GEN_INT (one_var == 1 ? 0 : 1),
18548 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18549 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18551 if (mode != V4SFmode)
18552 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18553 else if (tmp != target)
18554 emit_move_insn (target, tmp);
18556 else if (target != new_target)
18557 emit_move_insn (target, new_target);
18562 vsimode = V4SImode;
18568 vsimode = V2SImode;
18574 /* Zero extend the variable element to SImode and recurse. */
18575 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18577 x = gen_reg_rtx (vsimode);
18578 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18580 gcc_unreachable ();
18582 emit_move_insn (target, gen_lowpart (mode, x));
18590 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18591 consisting of the values in VALS. It is known that all elements
18592 except ONE_VAR are constants. Return true if successful. */
18595 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18596 rtx target, rtx vals, int one_var)
18598 rtx var = XVECEXP (vals, 0, one_var);
18599 enum machine_mode wmode;
18602 const_vec = copy_rtx (vals);
18603 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18604 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18612 /* For the two element vectors, it's just as easy to use
18613 the general case. */
18629 /* There's no way to set one QImode entry easily. Combine
18630 the variable value with its adjacent constant value, and
18631 promote to an HImode set. */
18632 x = XVECEXP (vals, 0, one_var ^ 1);
18635 var = convert_modes (HImode, QImode, var, true);
18636 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18638 x = GEN_INT (INTVAL (x) & 0xff);
18642 var = convert_modes (HImode, QImode, var, true);
18643 x = gen_int_mode (INTVAL (x) << 8, HImode);
18645 if (x != const0_rtx)
18646 var = expand_simple_binop (HImode, IOR, var, x, var,
18647 1, OPTAB_LIB_WIDEN);
18649 x = gen_reg_rtx (wmode);
18650 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18651 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18653 emit_move_insn (target, gen_lowpart (mode, x));
18660 emit_move_insn (target, const_vec);
18661 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18665 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18666 all values variable, and none identical. */
18669 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18670 rtx target, rtx vals)
18672 enum machine_mode half_mode = GET_MODE_INNER (mode);
18673 rtx op0 = NULL, op1 = NULL;
18674 bool use_vec_concat = false;
18680 if (!mmx_ok && !TARGET_SSE)
18686 /* For the two element vectors, we always implement VEC_CONCAT. */
18687 op0 = XVECEXP (vals, 0, 0);
18688 op1 = XVECEXP (vals, 0, 1);
18689 use_vec_concat = true;
18693 half_mode = V2SFmode;
18696 half_mode = V2SImode;
18702 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18703 Recurse to load the two halves. */
18705 op0 = gen_reg_rtx (half_mode);
18706 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18707 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18709 op1 = gen_reg_rtx (half_mode);
18710 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18711 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18713 use_vec_concat = true;
18724 gcc_unreachable ();
18727 if (use_vec_concat)
18729 if (!register_operand (op0, half_mode))
18730 op0 = force_reg (half_mode, op0);
18731 if (!register_operand (op1, half_mode))
18732 op1 = force_reg (half_mode, op1);
18734 emit_insn (gen_rtx_SET (VOIDmode, target,
18735 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18739 int i, j, n_elts, n_words, n_elt_per_word;
18740 enum machine_mode inner_mode;
18741 rtx words[4], shift;
18743 inner_mode = GET_MODE_INNER (mode);
18744 n_elts = GET_MODE_NUNITS (mode);
18745 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18746 n_elt_per_word = n_elts / n_words;
18747 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18749 for (i = 0; i < n_words; ++i)
18751 rtx word = NULL_RTX;
18753 for (j = 0; j < n_elt_per_word; ++j)
18755 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18756 elt = convert_modes (word_mode, inner_mode, elt, true);
18762 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18763 word, 1, OPTAB_LIB_WIDEN);
18764 word = expand_simple_binop (word_mode, IOR, word, elt,
18765 word, 1, OPTAB_LIB_WIDEN);
18773 emit_move_insn (target, gen_lowpart (mode, words[0]));
18774 else if (n_words == 2)
18776 rtx tmp = gen_reg_rtx (mode);
18777 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18778 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18779 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18780 emit_move_insn (target, tmp);
18782 else if (n_words == 4)
18784 rtx tmp = gen_reg_rtx (V4SImode);
18785 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18786 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18787 emit_move_insn (target, gen_lowpart (mode, tmp));
18790 gcc_unreachable ();
18794 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18795 instructions unless MMX_OK is true. */
18798 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18800 enum machine_mode mode = GET_MODE (target);
18801 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18802 int n_elts = GET_MODE_NUNITS (mode);
18803 int n_var = 0, one_var = -1;
18804 bool all_same = true, all_const_zero = true;
18808 for (i = 0; i < n_elts; ++i)
18810 x = XVECEXP (vals, 0, i);
18811 if (!CONSTANT_P (x))
18812 n_var++, one_var = i;
18813 else if (x != CONST0_RTX (inner_mode))
18814 all_const_zero = false;
18815 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18819 /* Constants are best loaded from the constant pool. */
18822 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18826 /* If all values are identical, broadcast the value. */
18828 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18829 XVECEXP (vals, 0, 0)))
18832 /* Values where only one field is non-constant are best loaded from
18833 the pool and overwritten via move later. */
18837 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18838 XVECEXP (vals, 0, one_var),
18842 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18846 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18850 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18852 enum machine_mode mode = GET_MODE (target);
18853 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18854 bool use_vec_merge = false;
18863 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18864 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18866 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18868 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18869 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18879 /* For the two element vectors, we implement a VEC_CONCAT with
18880 the extraction of the other element. */
18882 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18883 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18886 op0 = val, op1 = tmp;
18888 op0 = tmp, op1 = val;
18890 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18891 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18899 use_vec_merge = true;
18903 /* tmp = target = A B C D */
18904 tmp = copy_to_reg (target);
18905 /* target = A A B B */
18906 emit_insn (gen_sse_unpcklps (target, target, target));
18907 /* target = X A B B */
18908 ix86_expand_vector_set (false, target, val, 0);
18909 /* target = A X C D */
18910 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18911 GEN_INT (1), GEN_INT (0),
18912 GEN_INT (2+4), GEN_INT (3+4)));
18916 /* tmp = target = A B C D */
18917 tmp = copy_to_reg (target);
18918 /* tmp = X B C D */
18919 ix86_expand_vector_set (false, tmp, val, 0);
18920 /* target = A B X D */
18921 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18922 GEN_INT (0), GEN_INT (1),
18923 GEN_INT (0+4), GEN_INT (3+4)));
18927 /* tmp = target = A B C D */
18928 tmp = copy_to_reg (target);
18929 /* tmp = X B C D */
18930 ix86_expand_vector_set (false, tmp, val, 0);
18931 /* target = A B X D */
18932 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18933 GEN_INT (0), GEN_INT (1),
18934 GEN_INT (2+4), GEN_INT (0+4)));
18938 gcc_unreachable ();
18943 /* Element 0 handled by vec_merge below. */
18946 use_vec_merge = true;
18952 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18953 store into element 0, then shuffle them back. */
18957 order[0] = GEN_INT (elt);
18958 order[1] = const1_rtx;
18959 order[2] = const2_rtx;
18960 order[3] = GEN_INT (3);
18961 order[elt] = const0_rtx;
18963 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18964 order[1], order[2], order[3]));
18966 ix86_expand_vector_set (false, target, val, 0);
18968 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18969 order[1], order[2], order[3]));
18973 /* For SSE1, we have to reuse the V4SF code. */
18974 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18975 gen_lowpart (SFmode, val), elt);
18980 use_vec_merge = TARGET_SSE2;
18983 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18994 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18995 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18996 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19000 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19002 emit_move_insn (mem, target);
19004 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19005 emit_move_insn (tmp, val);
19007 emit_move_insn (target, mem);
19012 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19014 enum machine_mode mode = GET_MODE (vec);
19015 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19016 bool use_vec_extr = false;
19029 use_vec_extr = true;
19041 tmp = gen_reg_rtx (mode);
19042 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19043 GEN_INT (elt), GEN_INT (elt),
19044 GEN_INT (elt+4), GEN_INT (elt+4)));
19048 tmp = gen_reg_rtx (mode);
19049 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19053 gcc_unreachable ();
19056 use_vec_extr = true;
19071 tmp = gen_reg_rtx (mode);
19072 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19073 GEN_INT (elt), GEN_INT (elt),
19074 GEN_INT (elt), GEN_INT (elt)));
19078 tmp = gen_reg_rtx (mode);
19079 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19083 gcc_unreachable ();
19086 use_vec_extr = true;
19091 /* For SSE1, we have to reuse the V4SF code. */
19092 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19093 gen_lowpart (V4SFmode, vec), elt);
19099 use_vec_extr = TARGET_SSE2;
19102 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19107 /* ??? Could extract the appropriate HImode element and shift. */
19114 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19115 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19117 /* Let the rtl optimizers know about the zero extension performed. */
19118 if (inner_mode == HImode)
19120 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19121 target = gen_lowpart (SImode, target);
19124 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19128 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19130 emit_move_insn (mem, vec);
19132 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19133 emit_move_insn (target, tmp);
19137 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19138 pattern to reduce; DEST is the destination; IN is the input vector. */
19141 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19143 rtx tmp1, tmp2, tmp3;
19145 tmp1 = gen_reg_rtx (V4SFmode);
19146 tmp2 = gen_reg_rtx (V4SFmode);
19147 tmp3 = gen_reg_rtx (V4SFmode);
19149 emit_insn (gen_sse_movhlps (tmp1, in, in));
19150 emit_insn (fn (tmp2, tmp1, in));
19152 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19153 GEN_INT (1), GEN_INT (1),
19154 GEN_INT (1+4), GEN_INT (1+4)));
19155 emit_insn (fn (dest, tmp2, tmp3));
19158 /* Target hook for scalar_mode_supported_p. */
19160 ix86_scalar_mode_supported_p (enum machine_mode mode)
19162 if (DECIMAL_FLOAT_MODE_P (mode))
19165 return default_scalar_mode_supported_p (mode);
19168 /* Implements target hook vector_mode_supported_p. */
19170 ix86_vector_mode_supported_p (enum machine_mode mode)
19172 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19174 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19176 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19178 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19183 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19185 We do this in the new i386 backend to maintain source compatibility
19186 with the old cc0-based compiler. */
19189 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19190 tree inputs ATTRIBUTE_UNUSED,
19193 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19195 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19197 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19202 /* Return true if this goes in small data/bss. */
19205 ix86_in_large_data_p (tree exp)
19207 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19210 /* Functions are never large data. */
19211 if (TREE_CODE (exp) == FUNCTION_DECL)
19214 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19216 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19217 if (strcmp (section, ".ldata") == 0
19218 || strcmp (section, ".lbss") == 0)
19224 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19226 /* If this is an incomplete type with size 0, then we can't put it
19227 in data because it might be too big when completed. */
19228 if (!size || size > ix86_section_threshold)
19235 ix86_encode_section_info (tree decl, rtx rtl, int first)
19237 default_encode_section_info (decl, rtl, first);
19239 if (TREE_CODE (decl) == VAR_DECL
19240 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19241 && ix86_in_large_data_p (decl))
19242 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19245 /* Worker function for REVERSE_CONDITION. */
19248 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19250 return (mode != CCFPmode && mode != CCFPUmode
19251 ? reverse_condition (code)
19252 : reverse_condition_maybe_unordered (code));
19255 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19259 output_387_reg_move (rtx insn, rtx *operands)
19261 if (REG_P (operands[1])
19262 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19264 if (REGNO (operands[0]) == FIRST_STACK_REG)
19265 return output_387_ffreep (operands, 0);
19266 return "fstp\t%y0";
19268 if (STACK_TOP_P (operands[0]))
19269 return "fld%z1\t%y1";
19273 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19274 FP status register is set. */
19277 ix86_emit_fp_unordered_jump (rtx label)
19279 rtx reg = gen_reg_rtx (HImode);
19282 emit_insn (gen_x86_fnstsw_1 (reg));
19284 if (TARGET_USE_SAHF)
19286 emit_insn (gen_x86_sahf_1 (reg));
19288 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19289 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19293 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19295 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19296 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19299 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19300 gen_rtx_LABEL_REF (VOIDmode, label),
19302 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19303 emit_jump_insn (temp);
19306 /* Output code to perform a log1p XFmode calculation. */
19308 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19310 rtx label1 = gen_label_rtx ();
19311 rtx label2 = gen_label_rtx ();
19313 rtx tmp = gen_reg_rtx (XFmode);
19314 rtx tmp2 = gen_reg_rtx (XFmode);
19316 emit_insn (gen_absxf2 (tmp, op1));
19317 emit_insn (gen_cmpxf (tmp,
19318 CONST_DOUBLE_FROM_REAL_VALUE (
19319 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19321 emit_jump_insn (gen_bge (label1));
19323 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19324 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19325 emit_jump (label2);
19327 emit_label (label1);
19328 emit_move_insn (tmp, CONST1_RTX (XFmode));
19329 emit_insn (gen_addxf3 (tmp, op1, tmp));
19330 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19331 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19333 emit_label (label2);
19336 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19339 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19342 /* With Binutils 2.15, the "@unwind" marker must be specified on
19343 every occurrence of the ".eh_frame" section, not just the first
19346 && strcmp (name, ".eh_frame") == 0)
19348 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19349 flags & SECTION_WRITE ? "aw" : "a");
19352 default_elf_asm_named_section (name, flags, decl);
19355 /* Return the mangling of TYPE if it is an extended fundamental type. */
19357 static const char *
19358 ix86_mangle_fundamental_type (tree type)
19360 switch (TYPE_MODE (type))
19363 /* __float128 is "g". */
19366 /* "long double" or __float80 is "e". */
19373 /* For 32-bit code we can save PIC register setup by using
19374 __stack_chk_fail_local hidden function instead of calling
19375 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19376 register, so it is better to call __stack_chk_fail directly. */
19379 ix86_stack_protect_fail (void)
19381 return TARGET_64BIT
19382 ? default_external_stack_protect_fail ()
19383 : default_hidden_stack_protect_fail ();
19386 /* Select a format to encode pointers in exception handling data. CODE
19387 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19388 true if the symbol may be affected by dynamic relocations.
19390 ??? All x86 object file formats are capable of representing this.
19391 After all, the relocation needed is the same as for the call insn.
19392 Whether or not a particular assembler allows us to enter such, I
19393 guess we'll have to see. */
19395 asm_preferred_eh_data_format (int code, int global)
19399 int type = DW_EH_PE_sdata8;
19401 || ix86_cmodel == CM_SMALL_PIC
19402 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19403 type = DW_EH_PE_sdata4;
19404 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19406 if (ix86_cmodel == CM_SMALL
19407 || (ix86_cmodel == CM_MEDIUM && code))
19408 return DW_EH_PE_udata4;
19409 return DW_EH_PE_absptr;
19412 /* Expand copysign from SIGN to the positive value ABS_VALUE
19413 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19416 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19418 enum machine_mode mode = GET_MODE (sign);
19419 rtx sgn = gen_reg_rtx (mode);
19420 if (mask == NULL_RTX)
19422 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19423 if (!VECTOR_MODE_P (mode))
19425 /* We need to generate a scalar mode mask in this case. */
19426 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19427 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19428 mask = gen_reg_rtx (mode);
19429 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19433 mask = gen_rtx_NOT (mode, mask);
19434 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19435 gen_rtx_AND (mode, mask, sign)));
19436 emit_insn (gen_rtx_SET (VOIDmode, result,
19437 gen_rtx_IOR (mode, abs_value, sgn)));
19440 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19441 mask for masking out the sign-bit is stored in *SMASK, if that is
19444 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19446 enum machine_mode mode = GET_MODE (op0);
19449 xa = gen_reg_rtx (mode);
19450 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19451 if (!VECTOR_MODE_P (mode))
19453 /* We need to generate a scalar mode mask in this case. */
19454 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19455 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19456 mask = gen_reg_rtx (mode);
19457 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19459 emit_insn (gen_rtx_SET (VOIDmode, xa,
19460 gen_rtx_AND (mode, op0, mask)));
19468 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19469 swapping the operands if SWAP_OPERANDS is true. The expanded
19470 code is a forward jump to a newly created label in case the
19471 comparison is true. The generated label rtx is returned. */
19473 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19474 bool swap_operands)
19485 label = gen_label_rtx ();
19486 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19487 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19488 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19489 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19490 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19491 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19492 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19493 JUMP_LABEL (tmp) = label;
19498 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19499 using comparison code CODE. Operands are swapped for the comparison if
19500 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19502 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19503 bool swap_operands)
19505 enum machine_mode mode = GET_MODE (op0);
19506 rtx mask = gen_reg_rtx (mode);
19515 if (mode == DFmode)
19516 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19517 gen_rtx_fmt_ee (code, mode, op0, op1)));
19519 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19520 gen_rtx_fmt_ee (code, mode, op0, op1)));
19525 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19526 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19528 ix86_gen_TWO52 (enum machine_mode mode)
19530 REAL_VALUE_TYPE TWO52r;
19533 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19534 TWO52 = const_double_from_real_value (TWO52r, mode);
19535 TWO52 = force_reg (mode, TWO52);
19540 /* Expand SSE sequence for computing lround from OP1 storing
19543 ix86_expand_lround (rtx op0, rtx op1)
19545 /* C code for the stuff we're doing below:
19546 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19549 enum machine_mode mode = GET_MODE (op1);
19550 const struct real_format *fmt;
19551 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19554 /* load nextafter (0.5, 0.0) */
19555 fmt = REAL_MODE_FORMAT (mode);
19556 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19557 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19559 /* adj = copysign (0.5, op1) */
19560 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19561 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19563 /* adj = op1 + adj */
19564 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
19566 /* op0 = (imode)adj */
19567 expand_fix (op0, adj, 0);
19570 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19573 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19575 /* C code for the stuff we're doing below (for do_floor):
19577 xi -= (double)xi > op1 ? 1 : 0;
19580 enum machine_mode fmode = GET_MODE (op1);
19581 enum machine_mode imode = GET_MODE (op0);
19582 rtx ireg, freg, label, tmp;
19584 /* reg = (long)op1 */
19585 ireg = gen_reg_rtx (imode);
19586 expand_fix (ireg, op1, 0);
19588 /* freg = (double)reg */
19589 freg = gen_reg_rtx (fmode);
19590 expand_float (freg, ireg, 0);
19592 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19593 label = ix86_expand_sse_compare_and_jump (UNLE,
19594 freg, op1, !do_floor);
19595 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19596 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
19597 emit_move_insn (ireg, tmp);
19599 emit_label (label);
19600 LABEL_NUSES (label) = 1;
19602 emit_move_insn (op0, ireg);
19605 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19606 result in OPERAND0. */
19608 ix86_expand_rint (rtx operand0, rtx operand1)
19610 /* C code for the stuff we're doing below:
19611 xa = fabs (operand1);
19612 if (!isless (xa, 2**52))
19614 xa = xa + 2**52 - 2**52;
19615 return copysign (xa, operand1);
19617 enum machine_mode mode = GET_MODE (operand0);
19618 rtx res, xa, label, TWO52, mask;
19620 res = gen_reg_rtx (mode);
19621 emit_move_insn (res, operand1);
19623 /* xa = abs (operand1) */
19624 xa = ix86_expand_sse_fabs (res, &mask);
19626 /* if (!isless (xa, TWO52)) goto label; */
19627 TWO52 = ix86_gen_TWO52 (mode);
19628 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19630 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19631 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19633 ix86_sse_copysign_to_positive (res, xa, res, mask);
19635 emit_label (label);
19636 LABEL_NUSES (label) = 1;
19638 emit_move_insn (operand0, res);
19641 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19644 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19646 /* C code for the stuff we expand below.
19647 double xa = fabs (x), x2;
19648 if (!isless (xa, TWO52))
19650 xa = xa + TWO52 - TWO52;
19651 x2 = copysign (xa, x);
19660 enum machine_mode mode = GET_MODE (operand0);
19661 rtx xa, TWO52, tmp, label, one, res, mask;
19663 TWO52 = ix86_gen_TWO52 (mode);
19665 /* Temporary for holding the result, initialized to the input
19666 operand to ease control flow. */
19667 res = gen_reg_rtx (mode);
19668 emit_move_insn (res, operand1);
19670 /* xa = abs (operand1) */
19671 xa = ix86_expand_sse_fabs (res, &mask);
19673 /* if (!isless (xa, TWO52)) goto label; */
19674 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19676 /* xa = xa + TWO52 - TWO52; */
19677 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19678 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19680 /* xa = copysign (xa, operand1) */
19681 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19683 /* generate 1.0 or -1.0 */
19684 one = force_reg (mode,
19685 const_double_from_real_value (do_floor
19686 ? dconst1 : dconstm1, mode));
19688 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19689 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19690 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19691 gen_rtx_AND (mode, one, tmp)));
19692 /* We always need to subtract here to preserve signed zero. */
19693 tmp = expand_simple_binop (mode, MINUS,
19694 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19695 emit_move_insn (res, tmp);
19697 emit_label (label);
19698 LABEL_NUSES (label) = 1;
19700 emit_move_insn (operand0, res);
19703 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19706 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19708 /* C code for the stuff we expand below.
19709 double xa = fabs (x), x2;
19710 if (!isless (xa, TWO52))
19712 x2 = (double)(long)x;
19719 if (HONOR_SIGNED_ZEROS (mode))
19720 return copysign (x2, x);
19723 enum machine_mode mode = GET_MODE (operand0);
19724 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19726 TWO52 = ix86_gen_TWO52 (mode);
19728 /* Temporary for holding the result, initialized to the input
19729 operand to ease control flow. */
19730 res = gen_reg_rtx (mode);
19731 emit_move_insn (res, operand1);
19733 /* xa = abs (operand1) */
19734 xa = ix86_expand_sse_fabs (res, &mask);
19736 /* if (!isless (xa, TWO52)) goto label; */
19737 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19739 /* xa = (double)(long)x */
19740 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19741 expand_fix (xi, res, 0);
19742 expand_float (xa, xi, 0);
19745 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19747 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19748 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19749 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19750 gen_rtx_AND (mode, one, tmp)));
19751 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19752 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19753 emit_move_insn (res, tmp);
19755 if (HONOR_SIGNED_ZEROS (mode))
19756 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19758 emit_label (label);
19759 LABEL_NUSES (label) = 1;
19761 emit_move_insn (operand0, res);
19764 /* Expand SSE sequence for computing round from OPERAND1 storing
19765 into OPERAND0. Sequence that works without relying on DImode truncation
19766 via cvttsd2siq that is only available on 64bit targets. */
19768 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19770 /* C code for the stuff we expand below.
19771 double xa = fabs (x), xa2, x2;
19772 if (!isless (xa, TWO52))
19774 Using the absolute value and copying back sign makes
19775 -0.0 -> -0.0 correct.
19776 xa2 = xa + TWO52 - TWO52;
19781 else if (dxa > 0.5)
19783 x2 = copysign (xa2, x);
19786 enum machine_mode mode = GET_MODE (operand0);
19787 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19789 TWO52 = ix86_gen_TWO52 (mode);
19791 /* Temporary for holding the result, initialized to the input
19792 operand to ease control flow. */
19793 res = gen_reg_rtx (mode);
19794 emit_move_insn (res, operand1);
19796 /* xa = abs (operand1) */
19797 xa = ix86_expand_sse_fabs (res, &mask);
19799 /* if (!isless (xa, TWO52)) goto label; */
19800 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19802 /* xa2 = xa + TWO52 - TWO52; */
19803 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19804 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19806 /* dxa = xa2 - xa; */
19807 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
19809 /* generate 0.5, 1.0 and -0.5 */
19810 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19811 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
19812 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
19816 tmp = gen_reg_rtx (mode);
19817 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19818 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19819 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19820 gen_rtx_AND (mode, one, tmp)));
19821 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19822 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19823 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19824 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19825 gen_rtx_AND (mode, one, tmp)));
19826 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
19828 /* res = copysign (xa2, operand1) */
19829 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19831 emit_label (label);
19832 LABEL_NUSES (label) = 1;
19834 emit_move_insn (operand0, res);
19837 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19840 ix86_expand_trunc (rtx operand0, rtx operand1)
19842 /* C code for SSE variant we expand below.
19843 double xa = fabs (x), x2;
19844 if (!isless (xa, TWO52))
19846 x2 = (double)(long)x;
19847 if (HONOR_SIGNED_ZEROS (mode))
19848 return copysign (x2, x);
19851 enum machine_mode mode = GET_MODE (operand0);
19852 rtx xa, xi, TWO52, label, res, mask;
19854 TWO52 = ix86_gen_TWO52 (mode);
19856 /* Temporary for holding the result, initialized to the input
19857 operand to ease control flow. */
19858 res = gen_reg_rtx (mode);
19859 emit_move_insn (res, operand1);
19861 /* xa = abs (operand1) */
19862 xa = ix86_expand_sse_fabs (res, &mask);
19864 /* if (!isless (xa, TWO52)) goto label; */
19865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19867 /* x = (double)(long)x */
19868 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19869 expand_fix (xi, res, 0);
19870 expand_float (res, xi, 0);
19872 if (HONOR_SIGNED_ZEROS (mode))
19873 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19875 emit_label (label);
19876 LABEL_NUSES (label) = 1;
19878 emit_move_insn (operand0, res);
19881 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19884 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19886 enum machine_mode mode = GET_MODE (operand0);
19887 rtx xa, mask, TWO52, label, one, res, smask, tmp;
19889 /* C code for SSE variant we expand below.
19890 double xa = fabs (x), x2;
19891 if (!isless (xa, TWO52))
19893 xa2 = xa + TWO52 - TWO52;
19897 x2 = copysign (xa2, x);
19901 TWO52 = ix86_gen_TWO52 (mode);
19903 /* Temporary for holding the result, initialized to the input
19904 operand to ease control flow. */
19905 res = gen_reg_rtx (mode);
19906 emit_move_insn (res, operand1);
19908 /* xa = abs (operand1) */
19909 xa = ix86_expand_sse_fabs (res, &smask);
19911 /* if (!isless (xa, TWO52)) goto label; */
19912 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19914 /* res = xa + TWO52 - TWO52; */
19915 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
19916 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
19917 emit_move_insn (res, tmp);
19920 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19922 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19923 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19924 emit_insn (gen_rtx_SET (VOIDmode, mask,
19925 gen_rtx_AND (mode, mask, one)));
19926 tmp = expand_simple_binop (mode, MINUS,
19927 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
19928 emit_move_insn (res, tmp);
19930 /* res = copysign (res, operand1) */
19931 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19933 emit_label (label);
19934 LABEL_NUSES (label) = 1;
19936 emit_move_insn (operand0, res);
19939 /* Expand SSE sequence for computing round from OPERAND1 storing
19942 ix86_expand_round (rtx operand0, rtx operand1)
19944 /* C code for the stuff we're doing below:
19945 double xa = fabs (x);
19946 if (!isless (xa, TWO52))
19948 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19949 return copysign (xa, x);
19951 enum machine_mode mode = GET_MODE (operand0);
19952 rtx res, TWO52, xa, label, xi, half, mask;
19953 const struct real_format *fmt;
19954 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19956 /* Temporary for holding the result, initialized to the input
19957 operand to ease control flow. */
19958 res = gen_reg_rtx (mode);
19959 emit_move_insn (res, operand1);
19961 TWO52 = ix86_gen_TWO52 (mode);
19962 xa = ix86_expand_sse_fabs (res, &mask);
19963 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19965 /* load nextafter (0.5, 0.0) */
19966 fmt = REAL_MODE_FORMAT (mode);
19967 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19968 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19970 /* xa = xa + 0.5 */
19971 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19972 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
19974 /* xa = (double)(int64_t)xa */
19975 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19976 expand_fix (xi, xa, 0);
19977 expand_float (xa, xi, 0);
19979 /* res = copysign (xa, operand1) */
19980 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19982 emit_label (label);
19983 LABEL_NUSES (label) = 1;
19985 emit_move_insn (operand0, res);
19988 #include "gt-i386.h"